Coverage for C:\src\imod-python\imod\util\path.py: 92%
93 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
1"""
2Conventional IDF filenames can be understood and constructed using
3:func:`imod.util.path.decompose` and :func:`imod.util.path.compose`. These are used
4automatically in :func:`imod.idf`.
5"""
7import datetime
8import pathlib
9import re
10import tempfile
11from typing import Any, Dict
13import cftime
14import numpy as np
16from imod.util.time import _compose_timestring, to_datetime
18Pattern = re.Pattern
21def _custom_pattern_to_regex_pattern(pattern: str):
22 """
23 Compile iMOD Python's simplified custom pattern to regex pattern:
24 _custom_pattern_to_regex_pattern({name}_c{species})
25 is the same as calling:
26 (?P<name>[\\w.-]+)_c(?P<species>[\\w.-]+)).compile()
27 """
28 pattern = pattern.lower()
29 # Get the variables between curly braces
30 in_curly = re.compile(r"{(.*?)}").findall(pattern)
31 regex_parts = {key: f"(?P<{key}>[\\w.-]+)" for key in in_curly}
32 # Format the regex string, by filling in the variables
33 simple_regex = pattern.format(**regex_parts)
34 return re.compile(simple_regex)
37def _groupdict(stem: str, pattern: str | Pattern) -> Dict:
38 if pattern is not None:
39 if isinstance(pattern, Pattern):
40 d = pattern.match(stem).groupdict()
41 else:
42 re_pattern = _custom_pattern_to_regex_pattern(pattern)
43 # Use it to get the required variables
44 d = re_pattern.match(stem).groupdict()
45 else: # Default to "iMOD conventions": {name}_c{species}_{time}_l{layer}
46 has_layer = bool(re.search(r"_l\d+$", stem))
47 has_species = bool(
48 re.search(r"conc_c\d{1,3}_\d{8,14}", stem)
49 ) # We are strict in recognizing species
50 try: # try for time
51 base_pattern = r"(?P<name>[\w-]+)"
52 if has_species:
53 base_pattern += r"_c(?P<species>[0-9]+)"
54 base_pattern += r"_(?P<time>[0-9-]{6,})"
55 if has_layer:
56 base_pattern += r"_l(?P<layer>[0-9]+)"
57 re_pattern = re.compile(base_pattern)
58 d = re_pattern.match(stem).groupdict()
59 except AttributeError: # probably no time
60 base_pattern = r"(?P<name>[\w-]+)"
61 if has_species:
62 base_pattern += r"_c(?P<species>[0-9]+)"
63 if has_layer:
64 base_pattern += r"_l(?P<layer>[0-9]+)"
65 re_pattern = re.compile(base_pattern)
66 d = re_pattern.match(stem).groupdict()
67 return d
70def decompose(path, pattern: str = None) -> Dict[str, Any]:
71 r"""
72 Parse a path, returning a dict of the parts, following the iMOD conventions.
74 Parameters
75 ----------
76 path : str or pathlib.Path
77 Path to the file. Upper case is ignored.
78 pattern : str, regex pattern, optional
79 If the path is not made up of standard paths, and the default decompose
80 does not produce the right result, specify the used pattern here. See
81 the examples below.
83 Returns
84 -------
85 d : dict
86 Dictionary with name of variable and dimensions
88 Examples
89 --------
90 Decompose a path, relying on default conventions:
92 >>> decompose("head_20010101_l1.idf")
94 Do the same, by specifying a format string pattern, excluding extension:
96 >>> decompose("head_20010101_l1.idf", pattern="{name}_{time}_l{layer}")
98 This supports an arbitrary number of variables:
100 >>> decompose("head_slr_20010101_l1.idf", pattern="{name}_{scenario}_{time}_l{layer}")
102 The format string pattern will only work on tidy paths, where variables are
103 separated by underscores. You can also pass a compiled regex pattern.
104 Make sure to include the ``re.IGNORECASE`` flag since all paths are lowered.
106 >>> import re
107 >>> pattern = re.compile(r"(?P<name>[\w]+)L(?P<layer>[\d+]*)")
108 >>> decompose("headL11", pattern=pattern)
110 However, this requires constructing regular expressions, which is generally
111 a fiddly process. The website https://regex101.com is a nice help.
112 Alternatively, the most pragmatic solution may be to just rename your files.
113 """
114 path = pathlib.Path(path)
115 # We'll ignore upper case
116 stem = path.stem.lower()
118 d = _groupdict(stem, pattern)
119 dims = list(d.keys())
120 # If name is not provided, generate one from other fields
121 if "name" not in d.keys():
122 d["name"] = "_".join(d.values())
123 else:
124 dims.remove("name")
126 # TODO: figure out what to with user specified variables
127 # basically type inferencing via regex?
128 # if purely numerical \d* -> int or float
129 # if \d*\.\d* -> float
130 # else: keep as string
132 # String -> type conversion
133 if "layer" in d.keys():
134 d["layer"] = int(d["layer"])
135 if "species" in d.keys():
136 d["species"] = int(d["species"])
137 if "time" in d.keys():
138 d["time"] = to_datetime(d["time"])
139 if "steady-state" in d["name"]:
140 # steady-state as time identifier isn't picked up by <time>[0-9] regex
141 d["name"] = d["name"].replace("_steady-state", "")
142 d["time"] = "steady-state"
143 dims.append("time")
145 d["extension"] = path.suffix
146 d["directory"] = path.parent
147 d["dims"] = dims
148 return d
151def compose(d, pattern=None) -> pathlib.Path:
152 """
153 From a dict of parts, construct a filename, following the iMOD
154 conventions.
155 """
156 haslayer = "layer" in d
157 hastime = "time" in d
158 hasspecies = "species" in d
160 if pattern is None:
161 if hastime:
162 time = d["time"]
163 d["timestr"] = "_{}".format(_compose_timestring(time))
164 else:
165 d["timestr"] = ""
167 if haslayer:
168 d["layerstr"] = "_l{}".format(int(d["layer"]))
169 else:
170 d["layerstr"] = ""
172 if hasspecies:
173 d["speciesstr"] = "_c{}".format(int(d["species"]))
174 else:
175 d["speciesstr"] = ""
177 s = "{name}{speciesstr}{timestr}{layerstr}{extension}".format(**d)
178 else:
179 if hastime:
180 time = d["time"]
181 if time != "steady-state":
182 # Change time to datetime.datetime
183 if isinstance(time, np.datetime64):
184 d["time"] = time.astype("datetime64[us]").item()
185 elif isinstance(time, cftime.datetime):
186 # Take first six elements of timetuple and convert to datetime
187 d["time"] = datetime.datetime(*time.timetuple()[:6])
188 s = pattern.format(**d)
190 if "directory" in d:
191 return pathlib.Path(d["directory"]) / s
192 else:
193 return pathlib.Path(s)
196def temporary_directory() -> pathlib.Path:
197 tempdir = tempfile.TemporaryDirectory()
198 return pathlib.Path(tempdir.name)