Coverage for C:\src\imod-python\imod\wq\pkgbase.py: 14%

264 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 10:26 +0200

1import abc 

2import pathlib 

3import warnings 

4 

5import jinja2 

6import numpy as np 

7import xarray as xr 

8 

9import imod 

10from imod.util.time import forcing_starts_ends, to_datetime_internal 

11 

12 

13class Package(abc.ABC): 

14 """ 

15 Base package for the different SEAWAT packages. 

16 Every package contains a ``_pkg_id`` for identification. 

17 Used to check for duplicate entries, or to group multiple systems together 

18 (riv, ghb, drn). 

19 

20 The ``_template`` attribute is the template for a section of the runfile. 

21 This is filled in based on the metadata from the DataArrays that are within 

22 the Package. 

23 

24 The ``_keywords`` attribute is a dictionary that's used to replace 

25 keyword argument by integer arguments for SEAWAT. 

26 """ 

27 

28 @classmethod 

29 def from_file(cls, path, **kwargs): 

30 """ 

31 Loads an imod-wq package from a file (currently only netcdf and zarr are supported). 

32 Note that it is expected that this file was saved with imod.wq.Package.save(), 

33 as the checks upon package initialization are not done again! 

34 

35 Parameters 

36 ---------- 

37 path : str, pathlib.Path 

38 Path to the file. 

39 **kwargs : keyword arguments 

40 Arbitrary keyword arguments forwarded to ``xarray.open_dataset()``, or 

41 ``xarray.open_zarr()``. 

42 Refer to the examples. 

43 

44 Returns 

45 ------- 

46 package : imod.wq.Package 

47 Returns a package with data loaded from file. 

48 

49 Examples 

50 -------- 

51 

52 To load a package from a file, e.g. a River package: 

53 

54 >>> river = imod.wq.River.from_file("river.nc") 

55 

56 For large datasets, you likely want to process it in chunks. You can 

57 forward keyword arguments to ``xarray.open_dataset()`` or 

58 ``xarray.open_zarr()``: 

59 

60 >>> river = imod.wq.River.from_file("river.nc", chunks={"time": 1}) 

61 

62 Refer to the xarray documentation for the possible keyword arguments. 

63 """ 

64 

65 path = pathlib.Path(path) 

66 

67 # See https://stackoverflow.com/a/2169191 

68 # We expect the data in the netcdf has been saved a a package 

69 # thus the checks run by __init__ and __setitem__ do not have 

70 # to be called again. 

71 return_cls = cls.__new__(cls) 

72 

73 if path.suffix in (".zip", ".zarr"): 

74 # TODO: seems like a bug? Remove str() call if fixed in xarray/zarr 

75 return_cls.dataset = xr.open_zarr(str(path), **kwargs) 

76 else: 

77 return_cls.dataset = xr.open_dataset(path, **kwargs) 

78 

79 return return_cls 

80 

81 def __init__(self): 

82 self.dataset = xr.Dataset() 

83 

84 def __setitem__(self, key, value): 

85 if isinstance(value, xr.DataArray): 

86 if "z" in value.dims: 

87 if "layer" not in value.coords: 

88 raise ValueError( 

89 'Coordinate "layer" must be present in DataArrays with a "z" dimension' 

90 ) 

91 value = value.swap_dims({"z": "layer"}) 

92 if "layer" in value.dims: 

93 value = value.dropna(dim="layer", how="all") 

94 self.dataset.__setitem__(key, value) 

95 

96 def __getitem__(self, key): 

97 return self.dataset.__getitem__(key) 

98 

99 def isel(self): 

100 raise NotImplementedError( 

101 f"Selection on packages not yet supported. " 

102 f"To make a selection on the xr.Dataset, call {self._pkg_id}.dataset.isel instead. " 

103 f"You can create a new package with a selection by calling {__class__.__name__}(**{self._pkg_id}.dataset.isel(**selection))" 

104 ) 

105 

106 def sel(self): 

107 raise NotImplementedError( 

108 f"Selection on packages not yet supported. " 

109 f"To make a selection on the xr.Dataset, call {self._pkg_id}.dataset.sel instead. " 

110 f"You can create a new package with a selection by calling {__class__.__name__}(**{self._pkg_id}.dataset.sel(**selection))" 

111 ) 

112 

113 def _replace_keyword(self, d, key): 

114 """ 

115 Method to replace a readable keyword value by the corresponding cryptic 

116 integer value that SEAWAT demands. 

117 

118 Dict ``d`` is updated in place. 

119 

120 Parameters 

121 ---------- 

122 d : dict 

123 Updated in place. 

124 key : str 

125 key of value in dict ``d`` to replace. 

126 """ 

127 keyword = d[key][()] # Get value from 0d np.array 

128 value = self._keywords[key][keyword] 

129 d[key] = value 

130 

131 def _render(self, *args, **kwargs): 

132 """ 

133 Rendering method for simple keyword packages (vdf, pcg). 

134 

135 Returns 

136 ------- 

137 rendered : str 

138 The rendered runfile part for a single boundary condition system. 

139 """ 

140 d = {k: v.values for k, v in self.dataset.data_vars.items()} # pylint: disable=no-member 

141 if hasattr(self, "_keywords"): 

142 for key in self._keywords.keys(): 

143 self._replace_keyword(d, key) 

144 return self._template.format(**d) 

145 

146 def _compose_path(self, d, pattern=None): 

147 # d : dict 

148 # pattern : string or re.pattern 

149 return imod.util.path.compose(d, pattern).as_posix() 

150 

151 def _compress_idflayers(self, values, range_path): 

152 """ 

153 Compresses explicit layers into ranges 

154 

155 Saves on number of lines, makes the runfile smaller, and imod-wq faster. 

156 """ 

157 layers = np.array(list(values.keys())) 

158 if len(layers) == 1: 

159 return values 

160 

161 breaks = np.argwhere(np.diff(layers) != 1) 

162 if breaks.size == 0: 

163 start = layers[0] 

164 end = layers[-1] 

165 return {f"{start}:{end}": range_path} 

166 starts = [0] + list(breaks[:, 0] + 1) 

167 ends = list(breaks[:, 0]) + [len(layers) - 1] 

168 

169 compressed = {} 

170 for start_index, end_index in zip(starts, ends): 

171 start = layers[start_index] 

172 end = layers[end_index] 

173 if start == end: 

174 compressed[f"{start}"] = values[start] 

175 else: 

176 compressed[f"{start}:{end}"] = range_path 

177 

178 return compressed 

179 

180 def _compose_values_layer( 

181 self, varname, directory, nlayer, time=None, da=None, compress=True 

182 ): 

183 """ 

184 Composes paths to files, or gets the appropriate scalar value for 

185 a single variable in a dataset. 

186 

187 Parameters 

188 ---------- 

189 varname : str 

190 variable name of the DataArray 

191 directory : str 

192 Path to working directory, where files will be written. 

193 Necessary to generate the paths for the runfile. 

194 time : datetime like, optional 

195 Time corresponding to the value. 

196 da : xr.DataArray, optional 

197 In some cases fetching the DataArray by varname is not desired. 

198 It can be passed directly via this optional argument. 

199 compress : boolean 

200 Whether or not to compress the layers using the imod-wq macros. 

201 Should be disabled for time-dependent input. 

202 

203 Returns 

204 ------- 

205 values : dict 

206 Dictionary containing the {layer number: path to file}. 

207 Alternatively: {layer number: scalar value}. The layer number may be 

208 a wildcard (e.g. '?'). 

209 """ 

210 pattern = "{name}" 

211 

212 values = {} 

213 if da is None: 

214 da = self.dataset[varname] 

215 

216 d = {"directory": directory, "name": varname, "extension": ".idf"} 

217 

218 if "species" in da.coords: 

219 d["species"] = da.coords["species"].values 

220 pattern += "_c{species}" 

221 

222 if time is not None: 

223 d["time"] = time 

224 pattern += "_{time:%Y%m%d%H%M%S}" 

225 

226 # Scalar value or not? 

227 # If it's a scalar value we can immediately write 

228 # otherwise, we have to write a file path 

229 if "y" not in da.coords and "x" not in da.coords: 

230 idf = False 

231 else: 

232 idf = True 

233 

234 if "layer" not in da.coords: 

235 if idf: 

236 # Special case concentration 

237 # Using "?" results in too many sinks and sources according to imod-wq. 

238 pattern += "{extension}" 

239 if hasattr(self, "_ssm_layers"): 

240 for layer in self._ssm_layers: 

241 d["layer"] = layer 

242 values[layer] = self._compose_path(d, pattern=pattern) 

243 else: 

244 values["?"] = self._compose_path(d, pattern=pattern) 

245 else: 

246 # Special case concentration 

247 # Using "?" results in too many sinks and sources according to imod-wq. 

248 if hasattr(self, "_ssm_layers"): 

249 for layer in self._ssm_layers: 

250 values[layer] = da.values[()] 

251 values = self._compress_values(values) 

252 else: 

253 values["?"] = da.values[()] 

254 

255 else: 

256 pattern += "_l{layer}{extension}" 

257 for layer in np.atleast_1d(da.coords["layer"].values): 

258 if idf: 

259 d["layer"] = layer 

260 values[layer] = self._compose_path(d, pattern=pattern) 

261 else: 

262 if "layer" in da.dims: 

263 values[layer] = da.sel(layer=layer).values[()] 

264 else: 

265 values[layer] = da.values[()] 

266 

267 # Compress the runfile contents using the imod-wq macros 

268 if "layer" in da.dims: 

269 if idf and da["layer"].size == nlayer: 

270 # Compose does not accept non-integers, so use 0, then replace 

271 d["layer"] = 0 

272 token_path = imod.util.path.compose(d, pattern=pattern).as_posix() 

273 token_path = token_path.replace("_l0", "_l$") 

274 values = {"$": token_path} 

275 elif idf and compress: 

276 # Compose does not accept non-integers, so use 0, then replace 

277 d["layer"] = 0 

278 range_path = imod.util.path.compose(d, pattern=pattern).as_posix() 

279 range_path = range_path.replace("_l0", "_l:") 

280 values = self._compress_idflayers(values, range_path) 

281 elif compress: 

282 values = self._compress_values(values) 

283 

284 return values 

285 

286 def _compress_values(self, values): 

287 """ 

288 Compress repeated values into fewer lines, aimed at imod-wq macros. 

289 """ 

290 keys = list(values.keys()) 

291 values = np.array(list(values.values())) 

292 n = len(values) 

293 # Try fast method first 

294 try: 

295 index_ends = np.argwhere(np.diff(values) != 0) 

296 except np.core._exceptions.UFuncTypeError: 

297 # Now try a fully general method 

298 index_ends = [] 

299 for i in range(n - 1): 

300 if values[i] != values[i + 1]: 

301 index_ends.append(i) 

302 index_ends = np.array(index_ends) 

303 

304 index_ends = np.append(index_ends, n - 1) 

305 index_starts = np.insert(index_ends[:-1] + 1, 0, 0) 

306 compressed = {} 

307 for start_index, end_index in zip(index_starts, index_ends): 

308 s = keys[start_index] 

309 e = keys[end_index] 

310 value = values[start_index] 

311 if s == e: 

312 compressed[f"{s}"] = value 

313 else: 

314 compressed[f"{s}:{e}"] = value 

315 return compressed 

316 

317 def _compose_values_time(self, varname, globaltimes): 

318 da = self.dataset[varname] 

319 values = {} 

320 

321 if "time" in da.coords: 

322 package_times = da.coords["time"].values 

323 

324 starts_ends = forcing_starts_ends(package_times, globaltimes) 

325 for itime, start_end in enumerate(starts_ends): 

326 # TODO: this now fails on a non-dim time too 

327 # solution roughly the same as for layer above? 

328 values[start_end] = da.isel(time=itime).values[()] 

329 else: 

330 values["?"] = da.values[()] 

331 

332 values = self._compress_values(values) 

333 return values 

334 

335 def save(self, directory): 

336 for name, da in self.dataset.data_vars.items(): # pylint: disable=no-member 

337 if "y" in da.coords and "x" in da.coords: 

338 path = pathlib.Path(directory).joinpath(name) 

339 imod.idf.save(path, da) 

340 

341 def _check_positive(self, varnames): 

342 for var in varnames: 

343 # Take care with nan values 

344 if (self.dataset[var] < 0).any(): 

345 raise ValueError(f"{var} in {self} must be positive") 

346 

347 def _check_range(self, varname, lower, upper): 

348 # TODO: this isn't used anywhere so far. 

349 warn = False 

350 msg = "" 

351 if (self.dataset[varname] < lower).any(): 

352 warn = True 

353 msg += f"{varname} in {self}: values lower than {lower} detected. " 

354 if (self.dataset[varname] > upper).any(): 

355 warn = True 

356 msg += f"{varname} in {self}: values higher than {upper} detected." 

357 if warn: 

358 warnings.warn(msg, RuntimeWarning) 

359 

360 def _check_location_consistent(self, varnames): 

361 dims = set(self.dataset.dims) 

362 is_scalar = {} 

363 for var in varnames: 

364 scalar = (self.dataset[var].shape == ()) or not any( 

365 dim in self.dataset[var].dims for dim in ["time", "layer", "y", "x"] 

366 ) 

367 if not scalar: # skip scalar value 

368 dims = dims.intersection(self.dataset[var].dims) 

369 is_scalar[var] = scalar 

370 

371 is_nan = True 

372 for var in varnames: 

373 if not is_scalar[var]: # skip scalar values 

374 # dimensions cannot change for in-place operations 

375 # reduce to lowest set of dimension (e.g. just x and y) 

376 var_dims = set(self.dataset[var].dims) 

377 reduce_dims = var_dims.difference(dims) 

378 # Inplace boolean operator 

379 is_nan &= np.isnan(self.dataset[var]).all(dim=reduce_dims) 

380 

381 for var in varnames: 

382 if not is_scalar[var]: # skip scalar values 

383 if (np.isnan(self.dataset[var]) ^ is_nan).any(): 

384 raise ValueError( 

385 f"{var} in {self} is not consistent with all variables in: " 

386 f"{', '.join(varnames)}. nan values do not line up." 

387 ) 

388 

389 

390class BoundaryCondition(Package, abc.ABC): 

391 """ 

392 Base package for (transient) boundary conditions: 

393 * recharge 

394 * general head boundary 

395 * constant head 

396 * river 

397 * drainage 

398 """ 

399 

400 _template = jinja2.Template( 

401 " {%- for name, dictname in mapping -%}" 

402 " {%- for time, timedict in dicts[dictname].items() -%}" 

403 " {%- for layer, value in timedict.items() %}\n" 

404 " {{name}}_p{{time}}_s{{system_index}}_l{{layer}} = {{value}}\n" 

405 " {%- endfor -%}\n" 

406 " {%- endfor -%}" 

407 " {%- endfor -%}" 

408 ) 

409 _ssm_template = jinja2.Template( 

410 "{%- for species, timedict in concentration.items() -%}" 

411 " {%- for time, layerdict in timedict.items() -%}" 

412 " {%- for layer, value in layerdict.items() %}\n" 

413 " c{{pkg_id}}_t{{species}}_p{{time}}_l{{layer}} = {{value}}\n" 

414 " {%- endfor -%}" 

415 " {%- endfor -%}" 

416 "{%- endfor -%}" 

417 ) 

418 

419 def add_timemap(self, *args, **kwargs): 

420 import warnings 

421 

422 warnings.warn( 

423 "add_timemap is deprecated: use repeat_stress instead", FutureWarning 

424 ) 

425 self.repeat_stress(*args, **kwargs) 

426 

427 def _repeat_stress(self, varname, value, use_cftime): 

428 if value is not None: 

429 if varname not in self.dataset.data_vars: 

430 raise ValueError( 

431 f"{varname} does not occur in {self}\n cannot repeat stress" 

432 ) 

433 if "time" not in self.dataset[varname].coords: 

434 raise ValueError( 

435 f"{varname} in {self}\n does not have dimension time, cannot repeat stress." 

436 ) 

437 

438 # Replace both key and value by the right datetime type 

439 d = { 

440 to_datetime_internal(k, use_cftime): to_datetime_internal(v, use_cftime) 

441 for k, v in value.items() 

442 } 

443 self.dataset[varname].attrs["stress_repeats"] = d 

444 

445 def _compose_values_timelayer( 

446 self, varname, globaltimes, directory, nlayer, da=None 

447 ): 

448 """ 

449 Composes paths to files, or gets the appropriate scalar value for 

450 a single variable in a dataset. 

451 

452 Parameters 

453 ---------- 

454 varname : str 

455 variable name of the DataArray 

456 globaltimes : list, np.array 

457 Holds the global times, i.e. the combined unique times of 

458 every boundary condition that are used to define the stress 

459 periods. 

460 The times of the BoundaryCondition do not have to match all 

461 the global times. When a globaltime is not present in the 

462 BoundaryCondition, the value of the first previous available time is 

463 filled in. The effective result is a forward fill in time. 

464 directory : str 

465 Path to working directory, where files will be written. 

466 Necessary to generate the paths for the runfile. 

467 da : xr.DataArray, optional 

468 In some cases fetching the DataArray by varname is not desired. 

469 It can be passed directly via this optional argument. 

470 

471 Returns 

472 ------- 

473 values : dict 

474 Dictionary containing the {stress period number: {layer number: path 

475 to file}}. Alternatively: {stress period number: {layer number: scalar 

476 value}}. 

477 The stress period number and layer number may be wildcards (e.g. '?'). 

478 """ 

479 

480 values = {} 

481 

482 if da is None: 

483 da = self.dataset[varname] 

484 

485 if "time" in da.coords: 

486 da_times = da.coords["time"].values 

487 if "stress_repeats" in da.attrs: 

488 stress_repeats_keys = np.array(list(da.attrs["stress_repeats"].keys())) 

489 stress_repeats_values = np.array( 

490 list(da.attrs["stress_repeats"].values()) 

491 ) 

492 package_times, inds = np.unique( 

493 np.concatenate([da_times, stress_repeats_keys]), return_index=True 

494 ) 

495 # Times to write in the runfile 

496 runfile_times = np.concatenate([da_times, stress_repeats_values])[inds] 

497 else: 

498 runfile_times = package_times = da_times 

499 

500 starts_ends = forcing_starts_ends(package_times, globaltimes) 

501 

502 for time, start_end in zip(runfile_times, starts_ends): 

503 # Check whether any range occurs in the input. 

504 # If does does, compress should be False 

505 compress = not (":" in start_end) 

506 values[start_end] = self._compose_values_layer( 

507 varname, 

508 directory, 

509 nlayer=nlayer, 

510 time=time, 

511 da=da, 

512 compress=compress, 

513 ) 

514 

515 else: 

516 values["?"] = self._compose_values_layer( 

517 varname, directory, nlayer=nlayer, da=da 

518 ) 

519 

520 return values 

521 

522 def _max_active_n(self, varname, nlayer, nrow, ncol): 

523 """ 

524 Determine the maximum active number of cells that are active 

525 during a stress period. 

526 

527 Parameters 

528 ---------- 

529 varname : str 

530 name of the variable to use to calculate the maximum number of 

531 active cells. Generally conductance. 

532 shape : tuple of ints 

533 nlay, nrow, ncol taken from ibound. 

534 """ 

535 # First compute active number of cells 

536 if "time" in self.dataset[varname].coords: 

537 nmax = int(self.dataset[varname].groupby("time").count(xr.ALL_DIMS).max()) 

538 else: 

539 nmax = int(self[varname].count()) 

540 if "layer" not in self.dataset.coords: # Then it applies to every layer 

541 nmax *= nlayer 

542 self._cellcount = nmax # Store cellcount so it can be re-used for ssm. 

543 self._ssm_cellcount = nmax 

544 

545 # Second, compute active number of sinks and sources 

546 # overwite _ssm_cellcount if more specific info is available. 

547 if "concentration" in self.dataset.data_vars: 

548 da = self["concentration"] 

549 

550 if "species" in da.coords: 

551 nspecies = da.coords["species"].size 

552 else: 

553 nspecies = 1 

554 

555 if "y" not in da.coords and "x" not in da.coords: 

556 # It's not idf data, but scalar instead 

557 if "layer" in self.dataset.coords: 

558 # Store layers for rendering 

559 da_nlayer = self.dataset.coords["layer"].size 

560 if da_nlayer == nlayer: 

561 # Insert wildcard 

562 self._ssm_layers = ["?"] 

563 else: 

564 self._ssm_layers = self.dataset.coords["layer"].values 

565 nlayer = da_nlayer 

566 # Sinks and sources are applied everywhere 

567 # in contrast to other inputs 

568 nmax = nlayer * nrow * ncol 

569 

570 self._ssm_cellcount = nmax * nspecies 

571 

572 return nmax 

573 

574 def _render(self, directory, globaltimes, system_index, nlayer): 

575 """ 

576 Parameters 

577 ---------- 

578 directory : str 

579 Path to working directory, where files will be written. 

580 Necessary to generate the paths for the runfile. 

581 globaltimes : list, np.array 

582 Holds the global times, i.e. the combined unique times of 

583 every boundary condition that are used to define the stress 

584 periods. 

585 system_index : int 

586 Drainage, River, and GeneralHeadBoundary support multiple systems. 

587 This is the number that ends up in the runfile for a given 

588 BoundaryCondition object. 

589 Note that MT3DMS does not fully support multiple systems, and only 

590 the first system can act as source of species. 

591 

592 Returns 

593 ------- 

594 rendered : str 

595 The rendered runfile part for a single boundary condition system. 

596 """ 

597 mapping = tuple( 

598 [(k, v) for k, v in self._mapping if v in self.dataset.data_vars] 

599 ) 

600 d = {"mapping": mapping, "system_index": system_index} 

601 dicts = {} 

602 

603 for varname in self.dataset.data_vars.keys(): # pylint: disable=no-member 

604 if varname == "concentration": 

605 continue 

606 dicts[varname] = self._compose_values_timelayer( 

607 varname, globaltimes, directory, nlayer=nlayer 

608 ) 

609 

610 d["dicts"] = dicts 

611 

612 return self._template.render(d) 

613 

614 def _render_ssm(self, directory, globaltimes, nlayer): 

615 """ 

616 Parameters 

617 ---------- 

618 directory : str 

619 Path to working directory, where files will be written. 

620 Necessary to generate the paths for the runfile. 

621 globaltimes : list, np.array 

622 Holds the global times, i.e. the combined unique times of 

623 every boundary condition that are used to define the stress 

624 periods. 

625 

626 Returns 

627 ------- 

628 rendered : str 

629 The rendered runfile SSM part for a single boundary condition system. 

630 """ 

631 

632 if "concentration" not in self.dataset.data_vars: 

633 return "" 

634 

635 d = {"pkg_id": self._pkg_id} 

636 if "species" in self.dataset["concentration"].coords: 

637 concentration = {} 

638 for species in self.dataset["concentration"]["species"].values: 

639 concentration[species] = self._compose_values_timelayer( 

640 varname="concentration", 

641 da=self.dataset["concentration"].sel(species=species), 

642 globaltimes=globaltimes, 

643 directory=directory, 

644 nlayer=nlayer, 

645 ) 

646 else: 

647 concentration = { 

648 1: self._compose_values_timelayer( 

649 varname="concentration", 

650 da=self.dataset["concentration"], 

651 globaltimes=globaltimes, 

652 directory=directory, 

653 nlayer=nlayer, 

654 ) 

655 } 

656 d["concentration"] = concentration 

657 

658 return self._ssm_template.render(d)