src.slune.utils
1import os 2from typing import List, Optional, Tuple 3 4def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]: 5 """ Searches the root directory for a path of directories that matches the strings given in any order. 6 If only a partial match is found, returns the deepest matching path. 7 If no matches are found returns root_directory. 8 Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='. 9 10 Args: 11 - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='. 12 - root_directory (string, optional): Path to the root directory to be searched, default is current working directory. 13 14 Returns: 15 - max_depth (int): Depth of the deepest matching path. 16 - max_path (string): Path of the deepest matching path. 17 18 """ 19 20 def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path): 21 dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()] 22 stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list] 23 stripped_dir_list = list(set(stripped_dir_list)) 24 for string in curr_strings: 25 if string in stripped_dir_list: 26 dir_list = [d for d in dir_list if d.startswith(string)] 27 for d in dir_list: 28 new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path) 29 if new_depth > max_depth: 30 max_depth, max_path = new_depth, new_path 31 if depth > max_depth: 32 max_depth, max_path = depth, curr_root 33 return max_depth, max_path 34 35 max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '') 36 if max_depth > 0: 37 max_path = max_path[len(root_directory):] 38 dirs = max_path[1:].split(os.path.sep) 39 dirs = [d.split('=')[0].strip() +"=" for d in dirs] 40 max_path = os.path.join(*dirs) 41 max_path = os.path.join(root_directory, max_path) 42 return max_path 43 44def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str: 45 """ Replaces directories in path with existing directories with the same numerical value. 46 47 Args: 48 - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'. 49 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 50 51 Returns: 52 - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning. 53 54 """ 55 56 def is_numeric(s): 57 try: 58 float(s) 59 return True 60 except ValueError: 61 return False 62 63 dirs = og_path.split(os.path.sep) 64 equiv = root_directory 65 for d in dirs: 66 next_dir = os.path.join(equiv, d) 67 if os.path.exists(next_dir): 68 equiv = next_dir 69 else: 70 # If the directory doesn't exist, check if there's a directory with the same numerical value 71 dir_value = d.split('=')[1] 72 if is_numeric(dir_value): 73 dir_value = float(dir_value) 74 if os.path.exists(equiv): 75 existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()] 76 for existing_dir in existing_dirs: 77 existing_dir_value = existing_dir.split('=')[1] 78 if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value: 79 equiv = os.path.join(equiv, existing_dir) 80 break 81 # If there is no directory with the same numerical value 82 # we just keep the directory as is and move on to the next one 83 else: 84 equiv = next_dir 85 else: 86 # If the directory doesn't exist we just keep the directory as is and move on to the next one 87 equiv = next_dir 88 # Otherwise we just keep the directory as is and move on to the next one 89 else: 90 equiv = next_dir 91 return equiv 92 93def dict_to_strings(d: dict) -> List[str]: 94 """ Converts a dictionary into a list of strings in the form of '--key=value'. 95 96 Args: 97 - d (dict): Dictionary to be converted. 98 99 Returns: 100 - out (list of str): List of strings in the form of '--key=value'. 101 102 """ 103 104 out = [] 105 for key, value in d.items(): 106 if key.startswith('--'): 107 out.append('{}={}'.format(key, value)) 108 else: 109 out.append('--{}={}'.format(key, value)) 110 return out 111 112def find_csv_files(root_directory: Optional[str]='.') -> List[str]: 113 """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths. 114 115 Args: 116 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 117 118 Returns: 119 - csv_files (list of str): List of strings containing the paths to all csv files found. 120 121 """ 122 csv_files = [] 123 for root, dirs, files in os.walk(root_directory): 124 for file in files: 125 if file.endswith('.csv'): 126 csv_files.append(os.path.join(root, file)) 127 return csv_files 128 129def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]: 130 """ Find all possible paths of csv files that have directory matching one of each of all the parameters given. 131 132 Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given. 133 134 Args: 135 - dirs (list of str): List of directory names we want returned paths to have in their path. 136 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 137 138 Returns: 139 - matches (list of str): List of strings containing the paths to all csv files found. 140 141 """ 142 143 all_csv = find_csv_files(root_directory) 144 matches = [] 145 for csv in all_csv: 146 path = csv.split(os.path.sep) 147 if all([p in path for p in dirs]): 148 matches.append(csv) 149 return matches
5def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]: 6 """ Searches the root directory for a path of directories that matches the strings given in any order. 7 If only a partial match is found, returns the deepest matching path. 8 If no matches are found returns root_directory. 9 Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='. 10 11 Args: 12 - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='. 13 - root_directory (string, optional): Path to the root directory to be searched, default is current working directory. 14 15 Returns: 16 - max_depth (int): Depth of the deepest matching path. 17 - max_path (string): Path of the deepest matching path. 18 19 """ 20 21 def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path): 22 dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()] 23 stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list] 24 stripped_dir_list = list(set(stripped_dir_list)) 25 for string in curr_strings: 26 if string in stripped_dir_list: 27 dir_list = [d for d in dir_list if d.startswith(string)] 28 for d in dir_list: 29 new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path) 30 if new_depth > max_depth: 31 max_depth, max_path = new_depth, new_path 32 if depth > max_depth: 33 max_depth, max_path = depth, curr_root 34 return max_depth, max_path 35 36 max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '') 37 if max_depth > 0: 38 max_path = max_path[len(root_directory):] 39 dirs = max_path[1:].split(os.path.sep) 40 dirs = [d.split('=')[0].strip() +"=" for d in dirs] 41 max_path = os.path.join(*dirs) 42 max_path = os.path.join(root_directory, max_path) 43 return max_path
Searches the root directory for a path of directories that matches the strings given in any order. If only a partial match is found, returns the deepest matching path. If no matches are found returns root_directory. Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
Arguments:
- - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
- - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
Returns:
- max_depth (int): Depth of the deepest matching path.
- max_path (string): Path of the deepest matching path.
45def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str: 46 """ Replaces directories in path with existing directories with the same numerical value. 47 48 Args: 49 - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'. 50 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 51 52 Returns: 53 - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning. 54 55 """ 56 57 def is_numeric(s): 58 try: 59 float(s) 60 return True 61 except ValueError: 62 return False 63 64 dirs = og_path.split(os.path.sep) 65 equiv = root_directory 66 for d in dirs: 67 next_dir = os.path.join(equiv, d) 68 if os.path.exists(next_dir): 69 equiv = next_dir 70 else: 71 # If the directory doesn't exist, check if there's a directory with the same numerical value 72 dir_value = d.split('=')[1] 73 if is_numeric(dir_value): 74 dir_value = float(dir_value) 75 if os.path.exists(equiv): 76 existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()] 77 for existing_dir in existing_dirs: 78 existing_dir_value = existing_dir.split('=')[1] 79 if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value: 80 equiv = os.path.join(equiv, existing_dir) 81 break 82 # If there is no directory with the same numerical value 83 # we just keep the directory as is and move on to the next one 84 else: 85 equiv = next_dir 86 else: 87 # If the directory doesn't exist we just keep the directory as is and move on to the next one 88 equiv = next_dir 89 # Otherwise we just keep the directory as is and move on to the next one 90 else: 91 equiv = next_dir 92 return equiv
Replaces directories in path with existing directories with the same numerical value.
Arguments:
- - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
- - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
- equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
94def dict_to_strings(d: dict) -> List[str]: 95 """ Converts a dictionary into a list of strings in the form of '--key=value'. 96 97 Args: 98 - d (dict): Dictionary to be converted. 99 100 Returns: 101 - out (list of str): List of strings in the form of '--key=value'. 102 103 """ 104 105 out = [] 106 for key, value in d.items(): 107 if key.startswith('--'): 108 out.append('{}={}'.format(key, value)) 109 else: 110 out.append('--{}={}'.format(key, value)) 111 return out
Converts a dictionary into a list of strings in the form of '--key=value'.
Arguments:
- - d (dict): Dictionary to be converted.
Returns:
- out (list of str): List of strings in the form of '--key=value'.
113def find_csv_files(root_directory: Optional[str]='.') -> List[str]: 114 """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths. 115 116 Args: 117 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 118 119 Returns: 120 - csv_files (list of str): List of strings containing the paths to all csv files found. 121 122 """ 123 csv_files = [] 124 for root, dirs, files in os.walk(root_directory): 125 for file in files: 126 if file.endswith('.csv'): 127 csv_files.append(os.path.join(root, file)) 128 return csv_files
Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
Arguments:
- - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
- csv_files (list of str): List of strings containing the paths to all csv files found.
130def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]: 131 """ Find all possible paths of csv files that have directory matching one of each of all the parameters given. 132 133 Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given. 134 135 Args: 136 - dirs (list of str): List of directory names we want returned paths to have in their path. 137 - root_directory (str, optional): Path to the root directory to be searched, default is current working directory. 138 139 Returns: 140 - matches (list of str): List of strings containing the paths to all csv files found. 141 142 """ 143 144 all_csv = find_csv_files(root_directory) 145 matches = [] 146 for csv in all_csv: 147 path = csv.split(os.path.sep) 148 if all([p in path for p in dirs]): 149 matches.append(csv) 150 return matches
Find all possible paths of csv files that have directory matching one of each of all the parameters given.
Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
Arguments:
- - dirs (list of str): List of directory names we want returned paths to have in their path.
- - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
- matches (list of str): List of strings containing the paths to all csv files found.