Coverage for C:\src\imod-python\imod\formats\prj\prj.py: 92%
462 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-08 13:27 +0200
1"""
2Utilities for parsing a project file.
3"""
5import shlex
6from collections import defaultdict
7from datetime import datetime
8from itertools import chain
9from os import PathLike
10from pathlib import Path
11from typing import Any, Dict, List, Sequence, Tuple, Union
13import numpy as np
14import pandas as pd
15import xarray as xr
17import imod
19FilePath = Union[str, "PathLike[str]"]
22KEYS = {
23 "(bnd)": ("ibound",),
24 "(top)": ("top",),
25 "(bot)": ("bottom",),
26 "(thk)": ("thickness",),
27 "(khv)": ("kh",),
28 "(kva)": ("vertical_anisotropy",),
29 "(kdw)": ("transmissivity",),
30 "(kvv)": ("kv",),
31 "(vcw)": ("resistance",),
32 "(shd)": ("head",),
33 "(sto)": ("storage_coefficient",),
34 "(spy)": ("specific_yield",),
35 "(por)": ("porosity",),
36 "(ani)": ("factor", "angle"),
37 "(hfb)": ("gen",),
38 "(ibs)": (None),
39 "(pwt)": (None),
40 "(sft)": (None),
41 "(obs)": (None),
42 "(cbi)": (None),
43 "(sco)": (None),
44 "(dsp)": (None),
45 "(ics)": (None),
46 "(fcs)": (None),
47 "(ssc)": (None),
48 "(fod)": (None),
49 "(fos)": (None),
50 "(rct)": (None),
51 "(con)": (None),
52 "(pst)": (None),
53}
55DATE_KEYS = {
56 "(uzf)": (None,),
57 "(rch)": ("rate",),
58 "(evt)": ("rate", "surface", "depth"),
59 "(drn)": ("conductance", "elevation"),
60 "(olf)": ("elevation",),
61 "(riv)": ("conductance", "stage", "bottom_elevation", "infiltration_factor"),
62 "(isg)": ("isg",),
63 "(sfr)": ("isg",),
64 "(lak)": (None,),
65 "(wel)": ("ipf",),
66 "(mnw)": (None,),
67 "(ghb)": ("conductance", "head"),
68 "(chd)": ("head",),
69 "(fhb)": (None,),
70 "(fde)": (None,),
71 "(tvc)": (None,),
72}
74METASWAP_VARS = (
75 "boundary",
76 "landuse",
77 "rootzone_thickness",
78 "soil_physical_unit",
79 "meteo_station_number",
80 "surface_elevation",
81 "artificial_recharge",
82 "artifical_recharge_layer",
83 "artificial_recharge_capacity",
84 "wetted_area",
85 "urban_area",
86 "urban_ponding_depth",
87 "rural_ponding_depth",
88 "urban_runoff_resistance",
89 "rural_runoff_resistance",
90 "urban_runon_resistance",
91 "rural_runon_resistance",
92 "urban_infiltration_capacity",
93 "rural_infiltration_capacity",
94 "perched_water_table_level",
95 "soil_moisture_fraction",
96 "conductivitiy_factor",
97 "plot_number",
98 "steering_location",
99 "plot_drainage_level",
100 "plot_drainage_resistance",
101)
104class _LineIterator:
105 """
106 Like iter(lines), but we can go back and we check if we're finished.
107 """
109 def __init__(self, lines: List[List[str]]):
110 self.lines = lines
111 self.count = -1
112 self.length = len(lines)
114 def __iter__(self):
115 return self
117 def __next__(self) -> List[str]:
118 if self.finished:
119 raise StopIteration
120 self.count += 1
121 return self.lines[self.count]
123 def back(self) -> None:
124 self.count = max(self.count - 1, -1)
126 @property
127 def finished(self) -> bool:
128 return (self.count + 1) >= self.length
131def _tokenize(line: str) -> List[str]:
132 """
133 A value separator in Fortran list-directed input is:
135 * A comma if period decimal edit mode is POINT.
136 * One or more contiguous spaces (blanks); no tabs.
138 Other remarks:
140 * Values, except for character strings, cannot contain blanks.
141 * Strings may be unquoted if they do not start with a digit and no value
142 separators.
143 * Character strings can be quoted strings, using pairs of quotes ("), pairs
144 of apostrophes (').
145 * A quote or apostrophe must be preceded by a value separator to initite a
146 quoted string.
147 * An empty entry consists of two consecutive commas (or semicolons).
149 For the use here (parsing IMOD's project files), we ignore:
151 * A semicolon value separator if period decimal edit mode is COMMA.
152 * Complex constants given as two real constants separated by a comma and
153 enclosed in parentheses.
154 * Repetition counts: 4*(3.,2.) 2*, 4*'hello'
156 Furthermore, we do not expect commas inside of the project file entries,
157 since we expect:
159 * Package names: unquoted character strings.
160 * File paths: will not contain commas, no single apostrophe, nor a single
161 quote symbol, may contain whitespace if quoted. * Integers for counts and
162 settings.
163 * Floats for addition and multiplication values.
164 * Simple character strings for period names (summer, winter). These
165 technically could contain commas if quoted, which is very unlikely.
166 * No quotes or apostrophes are escaped.
168 With these assumptions, we can limit complexity considerably (see the
169 PyLiDiRe link for a complete implementation):
171 * First we split by comma (we don't expected commas in quote strings).
172 * Next we split by whitespace, unless quoted.
174 We can expect both single and double quotes, even within a single line:
175 shlex.split() handles this. Note that additional entries are likely
176 allowed, as the Fortran implementation only reads what is necessary,
177 then stops parsing.
179 See also:
180 * https://stackoverflow.com/questions/36165050/python-equivalent-of-fortran-list-directed-input
181 * https://gitlab.com/everythingfunctional/PyLiDiRe
182 * https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc5/index.html
183 * The Fortran 2003 Handbook
185 Examples
186 --------
188 Raise ValueError, due to missing closing quotation. (Can be enabled
189 shlex.split(s, posix=False)):
191 >> _tokenize("That's life")
193 >> _tokenize("That 's life'")
194 >> ["That", "s life"]
196 >> _tokenize("That,'s life'")
197 >> ["That", "s life"]
198 """
199 values = [v.strip().replace("\\", "/") for v in line.split(",")]
200 tokens = list(chain.from_iterable(shlex.split(v) for v in values))
201 return tokens
204def _wrap_error_message(
205 exception: Exception, description: str, lines: _LineIterator
206) -> None:
207 lines.back()
208 content = next(lines)
209 number = lines.count + 1
210 raise type(exception)(
211 f"{exception}\n"
212 f"Failed to parse {description} for line {number} with content:\n{content}"
213 )
216def _parse_blockheader(lines: _LineIterator) -> Tuple[int, str, str]:
217 try:
218 no_result = None, None, None
219 line = next(lines)
221 # Skip if it's an empty line.
222 if len(line) == 0:
223 return no_result
225 first = line[0].lower()
226 if first in ("periods", "species"):
227 return 1, first, None
228 # The line must contain atleast nper, key, active.
229 elif len(line) >= 3:
230 n = int(first)
231 key = line[1].lower()
232 active = line[2]
233 return n, key, active
234 # It's a comment or something.
235 else:
236 return no_result
237 except Exception as e:
238 _wrap_error_message(e, "block header", lines)
241def _parse_time(lines: _LineIterator) -> str:
242 try:
243 line = next(lines)
244 date = line[0].lower()
245 if len(line) > 1:
246 time = line[1]
247 return f"{date} {time}"
248 else:
249 return date
250 except Exception as e:
251 _wrap_error_message(e, "date time", lines)
254def _parse_blockline(lines: _LineIterator, time: str = None) -> Dict[str, Any]:
255 try:
256 line = next(lines)
257 content = {
258 "active": bool(int(line[0])),
259 "is_constant": int(line[1]),
260 "layer": int(line[2]),
261 "factor": float(line[3]),
262 "addition": float(line[4]),
263 "constant": float(line[5]),
264 }
265 if content["is_constant"] == 2:
266 content["path"] = Path(line[6]).resolve()
267 if time is not None:
268 content["time"] = time
269 return content
270 except Exception as e:
271 _wrap_error_message(e, "entries", lines)
274def _parse_nsub_nsystem(lines: _LineIterator) -> Tuple[int, int]:
275 try:
276 line = next(lines)
277 n_entry = int(line[0])
278 n_system = int(line[1])
279 return n_entry, n_system
280 except Exception as e:
281 _wrap_error_message(e, "number of sub-entries and number of systems", lines)
284def _parse_notimeblock(
285 lines: _LineIterator,
286 fields: List[str],
287) -> Dict[str, Any]:
288 n_entry, n_system = _parse_nsub_nsystem(lines)
290 if len(fields) != n_entry:
291 raise ValueError(
292 f"Expected NSUB entry of {len(fields)} for {fields}, read: {n_entry}"
293 )
294 content = {
295 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields
296 }
297 content["n_system"] = n_system
298 return content
301def _parse_capblock(
302 lines: _LineIterator,
303) -> Dict[str, Any]:
304 fields = METASWAP_VARS
305 n_entry, n_system = _parse_nsub_nsystem(lines)
307 if n_entry == 21:
308 # Remove layer entry.
309 fields = list(fields[:22]).pop(8)
310 elif n_entry == 22:
311 fields = fields[:22]
312 elif n_entry == 26:
313 pass
314 else:
315 raise ValueError(
316 f"Expected NSUB entry of 21, 22, or 26 for {fields}, read: {n_entry}"
317 )
319 content = {
320 field: [_parse_blockline(lines) for _ in range(n_system)] for field in fields
321 }
322 content["n_system"] = n_system
323 return content
326def _parse_extrablock(lines: _LineIterator, n: int) -> Dict[str, List[str]]:
327 """Parse the MetaSWAP "extra files" block"""
328 return {"paths": [next(lines) for _ in range(n)]}
331def _parse_timeblock(
332 lines: List[str],
333 fields: List[str],
334 n: int,
335) -> Dict[str, Any]:
336 n_fields = len(fields)
337 content = defaultdict(list)
338 for _ in range(n):
339 time = _parse_time(lines)
340 content["time"].append(time)
341 n_entry, n_system = _parse_nsub_nsystem(lines)
343 if n_fields != n_entry:
344 raise ValueError(
345 f"Expected NSUB entry of {n_fields} for {fields}, read: {n_entry}"
346 )
348 for field in fields:
349 content[field].extend(
350 [_parse_blockline(lines, time) for _ in range(n_system)]
351 )
353 content["n_system"] = n_system
354 return content
357def _parse_pcgblock(lines: _LineIterator) -> Dict[str, Any]:
358 try:
359 line = next(lines)
361 # TODO: which are optional? How many to expect?
362 # Check for an empty line to terminate the block?
363 types = {
364 "mxiter": int,
365 "iter1": int,
366 "hclose": float,
367 "rclose": float,
368 "relax": float,
369 "npcond": int,
370 "iprpcg": int,
371 "mutpcg": int,
372 "damppcg": float,
373 "damppcgt": float,
374 "iqerror": int,
375 "qerror": float,
376 }
378 if len(line) == 12:
379 line_iterator = iter(line)
380 content = {
381 k: valuetype(next(line_iterator)) for k, valuetype in types.items()
382 }
383 elif any("=" in s for s in line):
384 pcglines = [line] + [next(lines) for _ in range(11)]
385 content = {}
386 for line in pcglines:
387 # undo separation, partition on equality sign instead.
388 line = "".join(line)
389 key, _, value = line.lower().partition("=")
390 value = types[key](value)
391 content[key] = value
392 else:
393 raise ValueError(
394 f"Expected 12 KEY = VALUE pairs, or 12 values. Found {len(line)}"
395 )
397 return content
398 except Exception as e:
399 _wrap_error_message(e, "PCG entry", lines)
402def _parse_periodsblock(lines: _LineIterator) -> Dict[str, str]:
403 try:
404 periods = {}
405 while not lines.finished:
406 line = next(lines)
407 # Stop if we encounter an empty line.
408 if len(line) == 0:
409 break
410 # Read the alias
411 alias = line[0]
412 # Now read the time associated with it.
413 start = _parse_time(lines)
414 periods[alias] = start
415 return periods
416 except Exception as e:
417 _wrap_error_message(e, "periods data block", lines)
420def _parse_speciesblock(lines: _LineIterator):
421 try:
422 species = {}
423 while not lines.finished:
424 line = next(lines)
425 # Stop if we encounter an empty line.
426 if len(line) == 0:
427 break
428 name, nr = line
429 nr = int(nr)
430 species[nr] = name
431 return species
432 except Exception as e:
433 _wrap_error_message(e, "species entry", lines)
436def _parse_block(lines: _LineIterator, content: Dict[str, Any]) -> None:
437 """
438 Mutates content dict.
439 """
440 n = key = active = None
442 # A project file may contain any number of lines outside of a "topic"
443 # block. _parse_blockheader will return triple None in that case.
444 while key is None and not lines.finished:
445 n, key, active = _parse_blockheader(lines)
447 try:
448 if key in KEYS:
449 if n != 1:
450 raise ValueError(f"Expected N=1 for {key}, read: {n}")
451 fields = KEYS[key]
452 blockcontent = _parse_notimeblock(lines, fields)
453 elif key in DATE_KEYS:
454 fields = DATE_KEYS[key]
455 blockcontent = _parse_timeblock(lines, fields, n)
456 elif key == "(cap)":
457 blockcontent = _parse_capblock(lines)
458 elif key == "(pcg)":
459 blockcontent = _parse_pcgblock(lines)
460 elif key == "periods":
461 blockcontent = _parse_periodsblock(lines)
462 elif key == "species":
463 blockcontent = _parse_speciesblock(lines)
464 elif key == "extra":
465 blockcontent = _parse_extrablock(lines, n)
466 else:
467 other = ("(pcg)", "(gcg)", "(vdf)")
468 options = tuple(KEYS.keys()) + tuple(DATE_KEYS.keys()) + other
469 lines.back()
470 line = next(lines)
471 number = lines.count + 1
472 raise ValueError(
473 f"Failed to recognize header keyword: {key}. Expected one of keywords {options}"
474 f"\nErrored in line {number} with entries:\n{line}"
475 )
477 except Exception as e:
478 raise type(e)(f"{e}\nError occurred for keyword: {key}")
480 if blockcontent is not None and active is not None:
481 blockcontent["active"] = active
483 content[key] = blockcontent
484 return
487def _process_package_entry(entry: Dict):
488 """
489 The iMOD project file supports constants in lieu of IDFs.
490 """
491 coords = {"layer": entry["layer"]}
492 dims = ("layer",)
494 if "path" not in entry:
495 path = None
496 header = {"coords": coords}
497 value = entry["constant"]
498 else:
499 path = entry["path"]
500 header = imod.idf.header(path, pattern="{name}")
501 value = None
503 header["dims"] = dims
504 return path, header, value
507def _merge_coords(headers: List[Dict[str, Any]]) -> Dict[str, np.ndarray]:
508 coords = defaultdict(list)
509 for header in headers:
510 for key, value in header["coords"].items():
511 coords[key].append(value)
512 return {k: np.unique(coords[k]) for k in coords}
515def _create_datarray_from_paths(paths: List[str], headers: List[Dict[str, Any]]):
516 da = imod.formats.array_io.reading._load(
517 paths, use_cftime=False, _read=imod.idf._read, headers=headers
518 )
519 return da
522def _create_dataarray_from_values(values: List[float], headers: List[Dict[str, Any]]):
523 coords = _merge_coords(headers)
524 firstdims = headers[0]["dims"]
525 shape = [len(coord) for coord in coords.values()]
526 da = xr.DataArray(np.reshape(values, shape), dims=firstdims, coords=coords)
527 return da
530def _create_dataarray(
531 paths: List[str], headers: List[Dict[str, Any]], values: List[float]
532) -> xr.DataArray:
533 """
534 Create a DataArray from a list of IDF paths, or from constant values.
535 """
536 values_valid = []
537 paths_valid = []
538 headers_paths = []
539 headers_values = []
540 for path, header, value in zip(paths, headers, values):
541 if path is None:
542 headers_values.append(header)
543 values_valid.append(value)
544 else:
545 headers_paths.append(header)
546 paths_valid.append(path)
548 if paths_valid and values_valid:
549 dap = _create_datarray_from_paths(paths_valid, headers_paths)
550 dav = _create_dataarray_from_values(values_valid, headers_values)
551 dap.name = "tmp"
552 dav.name = "tmp"
553 da = xr.merge((dap, dav), join="outer")["tmp"]
554 elif paths_valid:
555 da = _create_datarray_from_paths(paths_valid, headers_paths)
556 elif values_valid:
557 da = _create_dataarray_from_values(values_valid, headers_values)
559 return da
562def _open_package_idf(
563 block_content: Dict[str, Any], variables: Sequence[str]
564) -> List[xr.DataArray]:
565 das = {}
566 for variable in variables:
567 variable_content = block_content[variable]
568 paths = []
569 headers = []
570 values = []
571 for entry in variable_content:
572 path, header, value = _process_package_entry(entry)
573 header["name"] = variable
574 header["dims"] = ["layer"]
575 header["layer"] = entry["layer"]
576 paths.append(path)
577 headers.append(header)
578 values.append(value)
580 das[variable] = _create_dataarray(paths, headers, values)
582 return [das]
585def _process_time(time: str, yearfirst: bool = True):
586 if time == "steady-state":
587 time = None
588 else:
589 if yearfirst:
590 if len(time) == 19:
591 time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S")
592 elif len(time) == 10:
593 time = datetime.strptime(time, "%Y-%m-%d")
594 else:
595 raise ValueError(
596 f"time data {time} does not match format "
597 '"%Y-%m-%d %H:%M:%S" or "%Y-%m-%d"'
598 )
599 else:
600 if len(time) == 19:
601 time = datetime.strptime(time, "%d-%m-%Y %H:%M:%S")
602 elif len(time) == 10:
603 time = datetime.strptime(time, "%d-%m-%Y")
604 else:
605 raise ValueError(
606 f"time data {time} does not match format "
607 '"%d-%m-%Y %H:%M:%S" or "%d-%m-%Y"'
608 )
609 return time
612def _process_boundary_condition_entry(entry: Dict, periods: Dict[str, datetime]):
613 """
614 The iMOD project file supports constants in lieu of IDFs.
616 Also process repeated stress periods (on a yearly basis): substitute the
617 original date here.
618 """
619 coords = {}
620 timestring = entry["time"]
622 # Resolve repeating periods first:
623 time = periods.get(timestring)
624 if time is not None:
625 repeat = time
626 else:
627 # this resolves e.g. "steady-state"
628 time = _process_time(timestring)
629 repeat = None
631 if time is None:
632 dims = ()
633 else:
634 dims = ("time",)
635 coords["time"] = time
637 # 0 signifies that the layer must be determined on the basis of
638 # bottom elevation and stage.
639 layer = entry["layer"]
640 if layer <= 0:
641 layer is None
642 else:
643 coords["layer"] = layer
644 dims = dims + ("layer",)
646 if "path" not in entry:
647 path = None
648 header = {"coords": coords}
649 value = entry["constant"]
650 else:
651 path = entry["path"]
652 header = imod.idf.header(path, pattern="{name}")
653 value = None
655 header["dims"] = dims
656 if layer is not None:
657 header["layer"] = layer
658 if time is not None:
659 header["time"] = time
661 return path, header, value, repeat
664def _open_boundary_condition_idf(
665 block_content, variables, periods: Dict[str, datetime]
666) -> Tuple[List[Dict[str, xr.DataArray]], List[datetime]]:
667 """
668 Read the variables specified from block_content.
669 """
670 n_system = block_content["n_system"]
671 n_time = len(block_content["time"])
672 n_total = n_system * n_time
674 das = [{} for _ in range(n_system)]
675 for variable in variables:
676 variable_content = block_content[variable]
678 n = len(variable_content)
679 if n != n_total:
680 raise ValueError(
681 f"Expected n_time * n_system = {n_time} * {n_system} = "
682 f"{n_total} entries for variable {variable}. Received: {n}"
683 )
685 # Group the paths and headers by system.
686 system_paths = defaultdict(list)
687 system_headers = defaultdict(list)
688 system_values = defaultdict(list)
689 all_repeats = set()
690 for i, entry in enumerate(variable_content):
691 path, header, value, repeat = _process_boundary_condition_entry(
692 entry, periods
693 )
694 header["name"] = variable
695 key = i % n_system
696 system_paths[key].append(path)
697 system_headers[key].append(header)
698 system_values[key].append(value)
699 if repeat:
700 all_repeats.add(repeat)
702 # Concat one system at a time.
703 for i, (paths, headers, values) in enumerate(
704 zip(system_paths.values(), system_headers.values(), system_values.values())
705 ):
706 das[i][variable] = _create_dataarray(paths, headers, values)
708 repeats = sorted(all_repeats)
709 return das, repeats
712def _read_package_gen(
713 block_content: Dict[str, Any], has_topbot: bool
714) -> List[Dict[str, Any]]:
715 out = []
716 for entry in block_content["gen"]:
717 gdf = imod.gen.read(entry["path"])
718 if has_topbot:
719 gdf["resistance"] = entry["factor"] * entry["addition"]
720 else:
721 gdf["multiplier"] = entry["factor"] * entry["addition"]
722 d = {
723 "geodataframe": gdf,
724 "layer": entry["layer"],
725 }
726 out.append(d)
727 return out
730def _read_package_ipf(
731 block_content: Dict[str, Any], periods: Dict[str, datetime]
732) -> Tuple[List[Dict[str, Any]], List[datetime]]:
733 out = []
734 repeats = []
735 for entry in block_content["ipf"]:
736 timestring = entry["time"]
737 layer = entry["layer"]
738 time = periods.get(timestring)
739 if time is None:
740 time = _process_time(timestring)
741 else:
742 repeats.append(time)
744 # Ensure the columns are identifiable.
745 path = Path(entry["path"])
746 ipf_df, indexcol, ext = imod.ipf._read_ipf(path)
747 if indexcol == 0:
748 # No associated files
749 columns = ("x", "y", "rate")
750 if layer <= 0:
751 df = ipf_df.iloc[:, :5]
752 columns = columns + ("top", "bottom")
753 else:
754 df = ipf_df.iloc[:, :3]
755 df.columns = columns
756 else:
757 dfs = []
758 for row in ipf_df.itertuples():
759 filename = row[indexcol]
760 path_assoc = path.parent.joinpath(f"{filename}.{ext}")
761 df_assoc = imod.ipf.read_associated(path_assoc).iloc[:, :2]
762 df_assoc.columns = ["time", "rate"]
763 df_assoc["x"] = row[1]
764 df_assoc["y"] = row[2]
765 df_assoc["id"] = path_assoc.stem
766 if layer <= 0:
767 df_assoc["top"] = row[4]
768 df_assoc["bottom"] = row[5]
769 dfs.append(df_assoc)
770 df = pd.concat(dfs, ignore_index=True, sort=False)
772 d = {
773 "dataframe": df,
774 "layer": layer,
775 "time": time,
776 }
777 out.append(d)
779 repeats = sorted(repeats)
780 return out, repeats
783def read_projectfile(path: FilePath) -> Dict[str, Any]:
784 """
785 Read an iMOD project file into a collection of nested dictionaries.
787 The top-level keys are the "topic" entries such "bnd" or "riv" in the
788 project file. An example structure of the dictionaries is visualized below:
790 .. code-block::
792 content
793 ├── bnd
794 │ ├── active: bool
795 │ └── ibound: list of dictionaries for each layer
796 ├── riv
797 │ ├── active: bool
798 │ ├── conductance: list of dictionaries for each time and layer.
799 │ ├── stage: idem.
800 │ ├── bottom_elevation: idem.
801 │ └── infiltration_factor: idem.
802 etc.
804 Time and layer are flattened into a single list and time is included in
805 every dictionary:
807 .. code-block::
809 stage
810 ├── 0 # First entry in list
811 │ ├── active: bool
812 │ ├── is_constant: bool
813 │ ├── layer: int
814 │ ├── factor: float
815 │ ├── addition: float
816 │ ├── constant: float
817 │ ├── path: str
818 │ └── time: str
819 │
820 ├── 1 # Second entry in list
821 │ ├── etc.
822 etc.
825 Parameters
826 ----------
827 path: str or Path
829 Returns
830 -------
831 content: Dict[str, Any]
832 """
833 # Force to Path
834 path = Path(path)
836 with open(path) as f:
837 lines = f.readlines()
839 tokenized = []
840 for i, line in enumerate(lines):
841 try:
842 tokenized.append(_tokenize(line))
843 except Exception as e:
844 raise type(e)(f"{e}\nError occurred in line {i}")
846 lines = _LineIterator(tokenized)
847 content = {}
848 wdir = path.parent
849 # Change dir temporarely to projectfile dir to resolve relative paths
850 with imod.util.cd(wdir):
851 while not lines.finished:
852 _parse_block(lines, content)
854 return content
857def open_projectfile_data(path: FilePath) -> Dict[str, Any]:
858 """
859 Read the contents of an iMOD project file and read/open the data present in
860 it:
862 * IDF data is lazily loaded into xarray.DataArrays.
863 * GEN data is eagerly loaded into geopandas.GeoDataFrames
864 * IPF data is eagerly loaded into pandas.DataFrames
865 * Non-file based entries (such as the PCG settings) are kept as a dictionary.
867 When multiple systems are present, they are numbered starting from one, e.g.:
869 * drn-1
870 * drn-2
872 Xarray requires valid dates for the time coordinate. Aliases such as
873 "summer" and "winter" that are associated with dates in the project file
874 Periods block cannot be used in the time coordinate. Hence, this function
875 will instead insert the dates associated with the aliases, with the year
876 replaced by 1899; as the iMOD calendar starts at 1900, this ensures that
877 the repeats are always first and that no date collisions will occur.
879 Parameters
880 ----------
881 path: pathlib.Path or str.
883 Returns
884 -------
885 data: Dict[str, Any]
886 Keys are the iMOD project file "topics", without parentheses.
887 """
888 content = read_projectfile(path)
889 periods_block = content.pop("periods", None)
890 if periods_block is None:
891 periods = {}
892 else:
893 # Set the year of a repeat date to 1899: this ensures it falls outside
894 # of the iMOD calendar. Collisions are then always avoided.
895 periods = {
896 key: _process_time(time, yearfirst=False).replace(year=1899)
897 for key, time in periods_block.items()
898 }
900 # Pop species block, at the moment we do not do much with,
901 # since most regional models are without solute transport
902 content.pop("species", None)
904 has_topbot = "(top)" in content and "(bot)" in content
905 prj_data = {}
906 repeat_stress = {}
907 for key, block_content in content.items():
908 repeats = None
909 try:
910 if key == "(hfb)":
911 data = _read_package_gen(block_content, has_topbot)
912 elif key == "(wel)":
913 data, repeats = _read_package_ipf(block_content, periods)
914 elif key == "(cap)":
915 variables = set(METASWAP_VARS).intersection(block_content.keys())
916 data = _open_package_idf(block_content, variables)
917 elif key in ("extra", "(pcg)"):
918 data = [block_content]
919 elif key in KEYS:
920 variables = KEYS[key]
921 data = _open_package_idf(block_content, variables)
922 elif key in DATE_KEYS:
923 variables = DATE_KEYS[key]
924 data, repeats = _open_boundary_condition_idf(
925 block_content, variables, periods
926 )
927 else:
928 raise KeyError(f"Unsupported key: '{key}'")
929 except Exception as e:
930 raise type(e)(f"{e}. Errored while opening/reading data entries for: {key}")
932 strippedkey = key.strip("(").strip(")")
933 if len(data) > 1:
934 for i, da in enumerate(data):
935 numbered_key = f"{strippedkey}-{i + 1}"
936 prj_data[numbered_key] = da
937 repeat_stress[numbered_key] = repeats
938 else:
939 prj_data[strippedkey] = data[0]
940 repeat_stress[strippedkey] = repeats
942 repeat_stress = {k: v for k, v in repeat_stress.items() if v}
943 return prj_data, repeat_stress
946def read_timfile(path: FilePath) -> List[Dict]:
947 def parsetime(time: str) -> datetime:
948 # Check for steady-state:
949 if time == "00000000000000":
950 return None
951 return datetime.strptime(time, "%Y%m%d%H%M%S")
953 with open(path, "r") as f:
954 lines = f.readlines()
956 # A line contains 2, 3, or 4 values:
957 # time, isave, nstp, tmult
958 casters = {
959 "time": parsetime,
960 "save": lambda x: bool(int(x)),
961 "n_timestep": int,
962 "timestep_multiplier": float,
963 }
964 content = []
965 for line in lines:
966 stripped = line.strip()
967 if stripped == "":
968 continue
969 parts = stripped.split(",")
970 entry = {k: cast(s.strip()) for s, (k, cast) in zip(parts, casters.items())}
971 content.append(entry)
973 return content