src.slune.utils

  1import os
  2from typing import List, Optional, Tuple
  3
  4def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]:
  5    """ Searches the root directory for a path of directories that matches the strings given in any order.
  6    If only a partial match is found, returns the deepest matching path.
  7    If no matches are found returns root_directory.
  8    Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
  9
 10    Args:
 11        - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
 12        - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
 13    
 14    Returns:
 15        - max_depth (int): Depth of the deepest matching path.
 16        - max_path (string): Path of the deepest matching path.
 17    
 18    """
 19
 20    def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path):
 21        dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()]
 22        stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list]
 23        stripped_dir_list = list(set(stripped_dir_list))
 24        for string in curr_strings:
 25            if string in stripped_dir_list:
 26                dir_list = [d for d in dir_list if d.startswith(string)]
 27                for d in dir_list:
 28                    new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path)
 29                    if new_depth > max_depth:
 30                        max_depth, max_path = new_depth, new_path
 31        if depth > max_depth:
 32            max_depth, max_path = depth, curr_root
 33        return max_depth, max_path
 34
 35    max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '')
 36    if max_depth > 0:
 37        max_path = max_path[len(root_directory):]
 38        dirs = max_path[1:].split(os.path.sep)
 39        dirs = [d.split('=')[0].strip() +"=" for d in dirs]
 40        max_path = os.path.join(*dirs)
 41        max_path = os.path.join(root_directory, max_path)
 42    return max_path
 43
 44def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str:
 45    """ Replaces directories in path with existing directories with the same numerical value.
 46
 47    Args:
 48        - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
 49        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
 50    
 51    Returns:
 52        - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
 53
 54    """
 55
 56    def is_numeric(s):
 57        try:
 58            float(s)
 59            return True
 60        except ValueError:
 61            return False
 62
 63    dirs = og_path.split(os.path.sep)
 64    equiv = root_directory
 65    for d in dirs:
 66        next_dir = os.path.join(equiv, d)
 67        if os.path.exists(next_dir):
 68            equiv = next_dir
 69        else:
 70            # If the directory doesn't exist, check if there's a directory with the same numerical value
 71            dir_value = d.split('=')[1]
 72            if is_numeric(dir_value):
 73                dir_value = float(dir_value)
 74                if os.path.exists(equiv):
 75                    existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()]
 76                    for existing_dir in existing_dirs:
 77                        existing_dir_value = existing_dir.split('=')[1]
 78                        if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value:
 79                            equiv = os.path.join(equiv, existing_dir)
 80                            break
 81                    # If there is no directory with the same numerical value 
 82                    # we just keep the directory as is and move on to the next one
 83                    else:
 84                        equiv = next_dir
 85                else:
 86                    # If the directory doesn't exist we just keep the directory as is and move on to the next one
 87                    equiv = next_dir
 88            # Otherwise we just keep the directory as is and move on to the next one
 89            else:
 90                equiv = next_dir
 91    return equiv
 92
 93def dict_to_strings(d: dict) -> List[str]:
 94    """ Converts a dictionary into a list of strings in the form of '--key=value'.
 95
 96    Args:
 97        - d (dict): Dictionary to be converted.
 98
 99    Returns:
100        - out (list of str): List of strings in the form of '--key=value'.
101
102    """
103
104    out = []
105    for key, value in d.items():
106        if key.startswith('--'):
107            out.append('{}={}'.format(key, value))
108        else:
109            out.append('--{}={}'.format(key, value))
110    return out
111
112def find_csv_files(root_directory: Optional[str]='.') -> List[str]:
113    """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
114
115    Args:
116        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
117
118    Returns:
119        - csv_files (list of str): List of strings containing the paths to all csv files found.
120
121    """
122    csv_files = []
123    for root, dirs, files in os.walk(root_directory):
124        for file in files:
125            if file.endswith('.csv'):
126                csv_files.append(os.path.join(root, file))
127    return csv_files
128
129def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]:
130    """ Find all possible paths of csv files that have directory matching one of each of all the parameters given.
131    
132    Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
133
134    Args:
135        - dirs (list of str): List of directory names we want returned paths to have in their path.
136        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
137
138    Returns:
139        - matches (list of str): List of strings containing the paths to all csv files found.
140
141    """
142
143    all_csv = find_csv_files(root_directory)
144    matches = []
145    for csv in all_csv:
146        path = csv.split(os.path.sep)
147        if all([p in path for p in dirs]):
148            matches.append(csv)
149    return matches
def find_directory_path( strings: List[str], root_directory: Optional[str] = '.') -> Tuple[int, str]:
 5def find_directory_path(strings: List[str], root_directory: Optional[str]='.') -> Tuple[int, str]:
 6    """ Searches the root directory for a path of directories that matches the strings given in any order.
 7    If only a partial match is found, returns the deepest matching path.
 8    If no matches are found returns root_directory.
 9    Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.
10
11    Args:
12        - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
13        - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
14    
15    Returns:
16        - max_depth (int): Depth of the deepest matching path.
17        - max_path (string): Path of the deepest matching path.
18    
19    """
20
21    def _find_directory_path(curr_strings, curr_root, depth, max_depth, max_path):
22        dir_list = [entry.name for entry in os.scandir(curr_root) if entry.is_dir()]
23        stripped_dir_list = [d.split('=')[0].strip() +"=" for d in dir_list]
24        stripped_dir_list = list(set(stripped_dir_list))
25        for string in curr_strings:
26            if string in stripped_dir_list:
27                dir_list = [d for d in dir_list if d.startswith(string)]
28                for d in dir_list:
29                    new_depth, new_path = _find_directory_path([s for s in curr_strings if s != string], os.path.join(curr_root, d), depth + 1, max_depth, max_path)
30                    if new_depth > max_depth:
31                        max_depth, max_path = new_depth, new_path
32        if depth > max_depth:
33            max_depth, max_path = depth, curr_root
34        return max_depth, max_path
35
36    max_depth, max_path = _find_directory_path(strings, root_directory, 0, -1, '')
37    if max_depth > 0:
38        max_path = max_path[len(root_directory):]
39        dirs = max_path[1:].split(os.path.sep)
40        dirs = [d.split('=')[0].strip() +"=" for d in dirs]
41        max_path = os.path.join(*dirs)
42        max_path = os.path.join(root_directory, max_path)
43    return max_path

Searches the root directory for a path of directories that matches the strings given in any order. If only a partial match is found, returns the deepest matching path. If no matches are found returns root_directory. Returns a stripped matching path of directories, ie. where we convert '--string=value' to '--string='.

Arguments:
  • - strings (list of str): List of strings to be matched in any order. Each string in list must be in the form '--string='.
  • - root_directory (string, optional): Path to the root directory to be searched, default is current working directory.
Returns:
  • max_depth (int): Depth of the deepest matching path.
  • max_path (string): Path of the deepest matching path.
def get_numeric_equiv(og_path: str, root_directory: Optional[str] = '.') -> str:
45def get_numeric_equiv(og_path: str, root_directory: Optional[str]='.') -> str:
46    """ Replaces directories in path with existing directories with the same numerical value.
47
48    Args:
49        - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
50        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
51    
52    Returns:
53        - equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
54
55    """
56
57    def is_numeric(s):
58        try:
59            float(s)
60            return True
61        except ValueError:
62            return False
63
64    dirs = og_path.split(os.path.sep)
65    equiv = root_directory
66    for d in dirs:
67        next_dir = os.path.join(equiv, d)
68        if os.path.exists(next_dir):
69            equiv = next_dir
70        else:
71            # If the directory doesn't exist, check if there's a directory with the same numerical value
72            dir_value = d.split('=')[1]
73            if is_numeric(dir_value):
74                dir_value = float(dir_value)
75                if os.path.exists(equiv):
76                    existing_dirs = [entry.name for entry in os.scandir(equiv) if entry.is_dir()]
77                    for existing_dir in existing_dirs:
78                        existing_dir_value = existing_dir.split('=')[1]
79                        if is_numeric(existing_dir_value) and float(existing_dir_value) == dir_value:
80                            equiv = os.path.join(equiv, existing_dir)
81                            break
82                    # If there is no directory with the same numerical value 
83                    # we just keep the directory as is and move on to the next one
84                    else:
85                        equiv = next_dir
86                else:
87                    # If the directory doesn't exist we just keep the directory as is and move on to the next one
88                    equiv = next_dir
89            # Otherwise we just keep the directory as is and move on to the next one
90            else:
91                equiv = next_dir
92    return equiv

Replaces directories in path with existing directories with the same numerical value.

Arguments:
  • - og_path (str): Path we want to check against existing paths, must be a subdirectory of root_directory and each directory must have form '--string=value'.
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
  • equiv (str): Path with values changed to match existing directories if values are numerically equivalent, with root directory at beginning.
def dict_to_strings(d: dict) -> List[str]:
 94def dict_to_strings(d: dict) -> List[str]:
 95    """ Converts a dictionary into a list of strings in the form of '--key=value'.
 96
 97    Args:
 98        - d (dict): Dictionary to be converted.
 99
100    Returns:
101        - out (list of str): List of strings in the form of '--key=value'.
102
103    """
104
105    out = []
106    for key, value in d.items():
107        if key.startswith('--'):
108            out.append('{}={}'.format(key, value))
109        else:
110            out.append('--{}={}'.format(key, value))
111    return out

Converts a dictionary into a list of strings in the form of '--key=value'.

Arguments:
  • - d (dict): Dictionary to be converted.
Returns:
  • out (list of str): List of strings in the form of '--key=value'.
def find_csv_files(root_directory: Optional[str] = '.') -> List[str]:
113def find_csv_files(root_directory: Optional[str]='.') -> List[str]:
114    """ Recursively finds all csv files in all subdirectories of the root directory and returns their paths.
115
116    Args:
117        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
118
119    Returns:
120        - csv_files (list of str): List of strings containing the paths to all csv files found.
121
122    """
123    csv_files = []
124    for root, dirs, files in os.walk(root_directory):
125        for file in files:
126            if file.endswith('.csv'):
127                csv_files.append(os.path.join(root, file))
128    return csv_files

Recursively finds all csv files in all subdirectories of the root directory and returns their paths.

Arguments:
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
  • csv_files (list of str): List of strings containing the paths to all csv files found.
def get_all_paths(dirs: List[str], root_directory: Optional[str] = '.') -> List[str]:
130def get_all_paths(dirs: List[str], root_directory: Optional[str]='.') -> List[str]:
131    """ Find all possible paths of csv files that have directory matching one of each of all the parameters given.
132    
133    Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.
134
135    Args:
136        - dirs (list of str): List of directory names we want returned paths to have in their path.
137        - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
138
139    Returns:
140        - matches (list of str): List of strings containing the paths to all csv files found.
141
142    """
143
144    all_csv = find_csv_files(root_directory)
145    matches = []
146    for csv in all_csv:
147        path = csv.split(os.path.sep)
148        if all([p in path for p in dirs]):
149            matches.append(csv)
150    return matches

Find all possible paths of csv files that have directory matching one of each of all the parameters given.

Finds all paths of csv files in all subdirectories of the root directory that have a directory in their path matching one of each of all the parameters given.

Arguments:
  • - dirs (list of str): List of directory names we want returned paths to have in their path.
  • - root_directory (str, optional): Path to the root directory to be searched, default is current working directory.
Returns:
  • matches (list of str): List of strings containing the paths to all csv files found.