Coverage for C:\src\imod-python\imod\mf6\out\disv.py: 91%

231 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 11:41 +0200

1import os 

2import struct 

3from typing import Any, BinaryIO, Dict, List, Optional, Tuple, cast 

4 

5import dask 

6import numba 

7import numpy as np 

8import scipy.sparse 

9import xarray as xr 

10import xugrid as xu 

11 

12from imod.mf6.utilities.dataset import assign_datetime_coords 

13 

14from . import cbc 

15from .common import ( 

16 FilePath, 

17 FloatArray, 

18 IntArray, 

19 _to_nan, 

20 get_first_header_advanced_package, 

21) 

22 

23 

24def _ugrid_iavert_javert( 

25 iavert: IntArray, javert: IntArray 

26) -> Tuple[IntArray, IntArray]: 

27 # The node numbers of MODFLOW loop around: the first equals the last 

28 # We have to remove these for the UGRID conventions, which do not loop around. 

29 n = np.diff(iavert) - 1 

30 # This also takes care of 0-based indexing: 

31 ia = np.concatenate(([0], np.cumsum(n))) 

32 keep = np.ones_like(javert, dtype=bool) 

33 # -2: -1 for 1- to 0-based indexing, -1 to get rid of closing node. 

34 keep[iavert[1:] - 2] = False 

35 return ia, javert[keep] - 1 

36 

37 

38def read_grb(f: BinaryIO, ntxt: int, lentxt: int) -> Dict[str, Any]: 

39 # we don't need any information from the the text lines that follow, 

40 # they are definitions that aim to make the file more portable, 

41 # so let's skip straight to the binary data 

42 f.seek(ntxt * lentxt, 1) 

43 

44 ncells = struct.unpack("i", f.read(4))[0] 

45 nlayer = struct.unpack("i", f.read(4))[0] 

46 ncells_per_layer = struct.unpack("i", f.read(4))[0] 

47 nvert = struct.unpack("i", f.read(4))[0] 

48 njavert = struct.unpack("i", f.read(4))[0] 

49 nja = struct.unpack("i", f.read(4))[0] 

50 if ncells != (nlayer * ncells_per_layer): 

51 raise ValueError(f"Invalid file {ncells} {nlayer} {ncells_per_layer}") 

52 _ = struct.unpack("d", f.read(8))[0] # xorigin 

53 _ = struct.unpack("d", f.read(8))[0] # yorigin 

54 f.seek(8, 1) # skip angrot 

55 top_np = np.fromfile(f, np.float64, ncells_per_layer) 

56 bottom_np = np.reshape( 

57 np.fromfile(f, np.float64, ncells), (nlayer, ncells_per_layer) 

58 ) 

59 vertices = np.reshape(np.fromfile(f, np.float64, nvert * 2), (nvert, 2)) 

60 _ = np.fromfile(f, np.float64, ncells_per_layer) # cellx 

61 _ = np.fromfile(f, np.float64, ncells_per_layer) # celly 

62 # Python is 0-based; MODFLOW6 is Fortran 1-based 

63 iavert = np.fromfile(f, np.int32, ncells_per_layer + 1) 

64 javert = np.fromfile(f, np.int32, njavert) 

65 ia = np.fromfile(f, np.int32, ncells + 1) 

66 ja = np.fromfile(f, np.int32, nja) 

67 idomain_np = np.reshape( 

68 np.fromfile(f, np.int32, ncells), (nlayer, ncells_per_layer) 

69 ) 

70 icelltype_np = np.reshape( 

71 np.fromfile(f, np.int32, ncells), (nlayer, ncells_per_layer) 

72 ) 

73 

74 iavert, javert = _ugrid_iavert_javert(iavert, javert) 

75 face_nodes = scipy.sparse.csr_matrix((javert, javert, iavert)) 

76 grid = xu.Ugrid2d(vertices[:, 0], vertices[:, 1], -1, face_nodes) 

77 facedim = grid.face_dimension 

78 

79 top = xr.DataArray(top_np, dims=[facedim], name="top") 

80 coords = {"layer": np.arange(1, nlayer + 1)} 

81 dims = ("layer", facedim) 

82 bottom = xr.DataArray(bottom_np, coords, dims, name="bottom") 

83 idomain = xr.DataArray(idomain_np, coords, dims, name="idomain") 

84 icelltype = xr.DataArray(icelltype_np, coords, dims, name="icelltype") 

85 

86 return { 

87 "distype": "disv", 

88 "grid": grid, 

89 "top": xu.UgridDataArray(top, grid), 

90 "bottom": xu.UgridDataArray(bottom, grid), 

91 "coords": coords, 

92 "ncells": ncells, 

93 "nlayer": nlayer, 

94 "ncells_per_layer": ncells_per_layer, 

95 "nja": nja, 

96 "ia": ia, 

97 "ja": ja, 

98 "idomain": xu.UgridDataArray(idomain, grid), 

99 "icelltype": xu.UgridDataArray(icelltype, grid), 

100 } 

101 

102 

103def read_times( 

104 path: FilePath, ntime: int, nlayer: int, ncells_per_layer: int 

105) -> FloatArray: 

106 """ 

107 Reads all total simulation times. 

108 """ 

109 times = np.empty(ntime, dtype=np.float64) 

110 

111 # Compute how much to skip to the next timestamp 

112 start_of_header = 16 

113 rest_of_header = 28 

114 data_single_layer = ncells_per_layer * 8 

115 header = 52 

116 nskip = ( 

117 rest_of_header 

118 + data_single_layer 

119 + (nlayer - 1) * (header + data_single_layer) 

120 + start_of_header 

121 ) 

122 

123 with open(path, "rb") as f: 

124 f.seek(start_of_header) 

125 for i in range(ntime): 

126 times[i] = struct.unpack("d", f.read(8))[0] # total simulation time 

127 f.seek(nskip, 1) 

128 return times 

129 

130 

131def read_hds_timestep( 

132 path: FilePath, nlayer: int, ncells_per_layer: int, dry_nan: bool, pos: int 

133) -> FloatArray: 

134 """ 

135 Reads all values of one timestep. 

136 """ 

137 with open(path, "rb") as f: 

138 f.seek(pos) 

139 a1d = np.empty(nlayer * ncells_per_layer, dtype=np.float64) 

140 for k in range(nlayer): 

141 f.seek(52, 1) # skip kstp, kper, pertime 

142 a1d[k * ncells_per_layer : (k + 1) * ncells_per_layer] = np.fromfile( 

143 f, np.float64, ncells_per_layer 

144 ) 

145 

146 a2d = a1d.reshape((nlayer, ncells_per_layer)) 

147 return _to_nan(a2d, dry_nan) 

148 

149 

150def open_hds( 

151 path: FilePath, 

152 grid_info: Dict[str, Any], 

153 dry_nan: bool, 

154 simulation_start_time: Optional[np.datetime64] = None, 

155 time_unit: Optional[str] = "d", 

156) -> xu.UgridDataArray: 

157 grid = grid_info["grid"] 

158 nlayer, ncells_per_layer = grid_info["nlayer"], grid_info["ncells_per_layer"] 

159 filesize = os.path.getsize(path) 

160 ntime = filesize // (nlayer * (52 + (ncells_per_layer * 8))) 

161 times = read_times(path, ntime, nlayer, ncells_per_layer) 

162 coords = grid_info["coords"] 

163 coords["time"] = times 

164 

165 dask_list = [] 

166 # loop over times and add delayed arrays 

167 for i in range(ntime): 

168 # TODO verify dimension order 

169 pos = i * (nlayer * (52 + ncells_per_layer * 8)) 

170 a = dask.delayed(read_hds_timestep)( 

171 path, nlayer, ncells_per_layer, dry_nan, pos 

172 ) 

173 x = dask.array.from_delayed( 

174 a, shape=(nlayer, ncells_per_layer), dtype=np.float64 

175 ) 

176 dask_list.append(x) 

177 

178 daskarr = dask.array.stack(dask_list, axis=0) 

179 da = xr.DataArray( 

180 daskarr, coords, ("time", "layer", grid.face_dimension), name=grid_info["name"] 

181 ) 

182 

183 if simulation_start_time is not None: 

184 da = assign_datetime_coords(da, simulation_start_time, time_unit) 

185 return xu.UgridDataArray(da, grid) 

186 

187 

188def open_imeth1_budgets( 

189 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

190) -> xu.UgridDataArray: 

191 """ 

192 Open the data for an imeth==1 budget section. Data is read lazily per 

193 timestep. 

194 

195 Can be used for: 

196 

197 * STO-SS 

198 * STO-SY 

199 * CSUB-CGELASTIC 

200 * CSUB-WATERCOMP 

201 

202 Utilizes the shape information from the DIS GRB file to create a dense 

203 array; (lazily) allocates for the entire domain (all layers, faces) 

204 per timestep. 

205 

206 Parameters 

207 ---------- 

208 cbc_path: str, pathlib.Path 

209 grb_content: dict 

210 header_list: List[Imeth1Header] 

211 

212 Returns 

213 ------- 

214 xr.DataArray with dims ("time", "layer", face_dimension) 

215 """ 

216 grid = grb_content["grid"] 

217 facedim = grid.face_dimension 

218 nlayer = grb_content["nlayer"] 

219 ncells_per_layer = grb_content["ncells_per_layer"] 

220 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

221 # Merge dictionaries 

222 coords = grb_content["coords"] | {"time": budgets["time"]} 

223 

224 da = xr.DataArray( 

225 data=budgets.data.reshape((budgets["time"].size, nlayer, ncells_per_layer)), 

226 coords=coords, 

227 dims=("time", "layer", facedim), 

228 name=None, 

229 ) 

230 return xu.UgridDataArray(da, grid) 

231 

232 

233def open_imeth6_budgets( 

234 cbc_path: FilePath, 

235 grb_content: dict, 

236 header_list: List[cbc.Imeth6Header], 

237 return_variable: str = "budget", 

238 return_id: np.ndarray | None = None, 

239) -> xu.UgridDataArray: 

240 """ 

241 Open the data for an imeth==6 budget section. 

242 

243 Uses the information of the DIS GRB file to create the properly sized dense 

244 xr.DataArrays (which store the entire domain). Doing so ignores the boundary 

245 condition internal index (id2) and any present auxiliary columns. 

246 

247 Parameters 

248 ---------- 

249 cbc_path: str, pathlib.Path 

250 grb_content: dict 

251 header_list: List[Imeth1Header] 

252 return_variable: str 

253 return_id: np.ndarray | None 

254 

255 Returns 

256 ------- 

257 xr.DataArray with dims ("time", "layer", "y", "x") 

258 """ 

259 # Allocates dense arrays for the entire model domain 

260 dtype = np.dtype( 

261 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

262 + [(name, np.float64) for name in header_list[0].auxtxt] 

263 ) 

264 shape = (grb_content["nlayer"], grb_content["ncells_per_layer"]) 

265 size = np.prod(shape) 

266 dask_list = [] 

267 time = np.empty(len(header_list), dtype=np.float64) 

268 for i, header in enumerate(header_list): 

269 time[i] = header.totim 

270 a = dask.delayed(cbc.read_imeth6_budgets_dense)( 

271 cbc_path, 

272 header.nlist, 

273 dtype, 

274 header.pos, 

275 size, 

276 shape, 

277 return_variable, 

278 return_id, 

279 ) 

280 x = dask.array.from_delayed(a, shape=shape, dtype=np.float64) 

281 dask_list.append(x) 

282 

283 daskarr = dask.array.stack(dask_list, axis=0) 

284 coords = grb_content["coords"] 

285 coords["time"] = time 

286 name = header_list[0].text 

287 grid = grb_content["grid"] 

288 da = xr.DataArray( 

289 daskarr, coords, ("time", "layer", grid.face_dimension), name=name 

290 ) 

291 return xu.UgridDataArray(da, grid) 

292 

293 

294@numba.njit 

295def disv_lower_index( 

296 ia: IntArray, 

297 ja: IntArray, 

298 ncells: int, 

299 nlayer: int, 

300 ncells_per_layer: int, 

301) -> IntArray: 

302 lower = np.full(ncells, -1, np.int64) 

303 for i in range(ncells): 

304 for nzi in range(ia[i], ia[i + 1]): 

305 nzi -= 1 # python is 0-based, modflow6 is 1-based 

306 j = ja[nzi] - 1 # python is 0-based, modflow6 is 1-based 

307 d = j - i 

308 if d < ncells_per_layer: # upper, diagonal, horizontal 

309 continue 

310 elif d == ncells_per_layer: # lower neighbor 

311 lower[i] = nzi 

312 else: # skips one: must be pass through 

313 npassed = int(d / ncells_per_layer) 

314 for ipass in range(0, npassed): 

315 lower[i + ipass * ncells_per_layer] = nzi 

316 

317 return lower.reshape(nlayer, ncells_per_layer) 

318 

319 

320def expand_indptr(ia: IntArray): 

321 n = np.diff(ia) 

322 return np.repeat(np.arange(ia.size - 1), n) 

323 

324 

325def disv_horizontal_index( 

326 ia: IntArray, 

327 ja: IntArray, 

328 nlayer: int, 

329 ncells_per_layer: int, 

330 edge_face_connectivity: IntArray, 

331 fill_value: int, 

332 face_coordinates: FloatArray, 

333): 

334 # Allocate output array 

335 nedge = len(edge_face_connectivity) 

336 horizontal = np.full((nlayer, nedge), -1) 

337 

338 # Grab the index values to the horizontal connections 

339 i = expand_indptr(ia) 

340 j = ja - 1 

341 d = j - i 

342 is_horizontal = (0 < d) & (d < ncells_per_layer) 

343 index = np.arange(j.size)[is_horizontal].reshape((nlayer, -1)) 

344 

345 # i -> j is pre-sorted (required by CSR structure); the edge_faces are repeated 

346 # per layer. Because i -> j is sorted in terms of face numbering, we need 

347 # only to figure out which order the edge_face_connectivity has. 

348 is_connection = edge_face_connectivity[:, 1] != fill_value 

349 edge_faces = edge_face_connectivity[is_connection] 

350 order = np.argsort(np.lexsort(edge_faces.T[::-1])) 

351 # Reshuffle for every layer 

352 index = index[:, order] 

353 

354 # Now set the values in the output array 

355 horizontal[:, is_connection] = index 

356 

357 # Compute unit components (x: u, y: v) 

358 edge_faces.sort(axis=1) 

359 u = np.full(nedge, np.nan) 

360 v = np.full(nedge, np.nan) 

361 xy = face_coordinates[edge_faces] 

362 dx = xy[:, 1, 0] - xy[:, 0, 0] 

363 dy = xy[:, 1, 1] - xy[:, 0, 1] 

364 t = np.sqrt(dx**2 + dy**2) 

365 u[is_connection] = dx / t 

366 v[is_connection] = dy / t 

367 return horizontal, u, v 

368 

369 

370def disv_to_horizontal_lower_indices( 

371 grb_content: dict, 

372) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: 

373 grid = grb_content["grid"] 

374 horizontal, u, v = disv_horizontal_index( 

375 ia=grb_content["ia"], 

376 ja=grb_content["ja"], 

377 nlayer=grb_content["nlayer"], 

378 ncells_per_layer=grb_content["ncells_per_layer"], 

379 edge_face_connectivity=grid.edge_face_connectivity, 

380 fill_value=grid.fill_value, 

381 face_coordinates=grid.face_coordinates, 

382 ) 

383 lower = disv_lower_index( 

384 ia=grb_content["ia"], 

385 ja=grb_content["ja"], 

386 ncells=grb_content["ncells"], 

387 nlayer=grb_content["nlayer"], 

388 ncells_per_layer=grb_content["ncells_per_layer"], 

389 ) 

390 

391 # Compute unit_vector 

392 

393 return ( 

394 xr.DataArray( 

395 horizontal, grb_content["coords"], dims=["layer", grid.edge_dimension] 

396 ), 

397 xr.DataArray(u, dims=[grid.edge_dimension]), 

398 xr.DataArray(v, dims=[grid.edge_dimension]), 

399 xr.DataArray(lower, grb_content["coords"], dims=["layer", grid.face_dimension]), 

400 ) 

401 

402 

403def disv_extract_lower_budget( 

404 budgets: xr.DataArray, index: xr.DataArray 

405) -> xr.DataArray: 

406 face_dimension = index.dims[-1] 

407 coords = dict(index.coords) 

408 coords["time"] = budgets["time"] 

409 # isel with a 3D array is extremely slow 

410 # this followed by the dask reshape is much faster for some reason. 

411 data = budgets.isel(linear_index=index.values.ravel()).data 

412 da = xr.DataArray( 

413 data=data.reshape((budgets["time"].size, *index.shape)), 

414 coords=coords, 

415 dims=("time", "layer", face_dimension), 

416 name="flow-ja-face", 

417 ) 

418 return da.where(index >= 0, other=0.0) 

419 

420 

421def disv_extract_horizontal_budget( 

422 budgets: xr.DataArray, index: xr.DataArray 

423) -> xr.DataArray: 

424 """ 

425 Grab horizontal flows from the flow-ja-face array. 

426 

427 This could be done by a single .isel() indexing operation, but those 

428 are extremely slow in this case, which seems to be an xarray issue. 

429 

430 Parameters 

431 ---------- 

432 budgets: xr.DataArray of floats 

433 flow-ja-face array, dims ("time", "linear_index") 

434 The linear index enumerates cell-to-cell connections in this case, not 

435 the individual cells. 

436 index: xr.DataArray of ints 

437 index array with dims("layer", edge_dimension) 

438 

439 Returns 

440 ------- 

441 xr.DataArray of floats with dims ("time", "layer", edge_dimension) 

442 """ 

443 edge_dimension = index.dims[-1] 

444 coords = dict(index.coords) 

445 coords["time"] = budgets["time"] 

446 # isel with a 3D array is extremely slow 

447 # this followed by the dask reshape is much faster for some reason. 

448 data = budgets.isel(linear_index=index.values.ravel()).data 

449 da = xr.DataArray( 

450 data=data.reshape((budgets["time"].size, *index.shape)), 

451 coords=coords, 

452 dims=("time", "layer", edge_dimension), 

453 name="flow-ja-face", 

454 ) 

455 return da.where(index >= 0, other=0.0) 

456 

457 

458def disv_open_face_budgets( 

459 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

460) -> Tuple[xu.UgridDataArray, xu.UgridDataArray, xu.UgridDataArray, xu.UgridDataArray]: 

461 horizontal_index, u, v, lower_index = disv_to_horizontal_lower_indices(grb_content) 

462 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

463 horizontal = disv_extract_horizontal_budget(budgets, horizontal_index) 

464 lower = disv_extract_horizontal_budget(budgets, lower_index) 

465 flow_x = -horizontal * u 

466 flow_y = -horizontal * v 

467 grid = grb_content["grid"] 

468 return ( 

469 xu.UgridDataArray(horizontal, grid), 

470 xu.UgridDataArray(flow_x, grid), 

471 xu.UgridDataArray(flow_y, grid), 

472 xu.UgridDataArray(lower, grid), 

473 ) 

474 

475 

476def open_cbc( 

477 cbc_path: FilePath, 

478 grb_content: Dict[str, Any], 

479 flowja: bool = False, 

480 simulation_start_time: Optional[np.datetime64] = None, 

481 time_unit: Optional[str] = "d", 

482) -> Dict[str, xu.UgridDataArray]: 

483 headers = cbc.read_cbc_headers(cbc_path) 

484 indices = None 

485 header_advanced_package = get_first_header_advanced_package(headers) 

486 if header_advanced_package is not None: 

487 # For advanced packages the id2 column of variable gwf contains the MF6 id's. 

488 # Get id's eager from first stress period. 

489 dtype = np.dtype( 

490 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

491 + [(name, np.float64) for name in header_advanced_package.auxtxt] 

492 ) 

493 table = cbc.read_imeth6_budgets( 

494 cbc_path, header_advanced_package.nlist, dtype, header_advanced_package.pos 

495 ) 

496 indices = table["id2"] - 1 # Convert to 0 based index 

497 cbc_content = {} 

498 for key, header_list in headers.items(): 

499 if key == "flow-ja-face" and isinstance(header_list[0], cbc.Imeth1Header): 

500 if flowja: 

501 assert all(isinstance(x, cbc.Imeth1Header) for x in header_list) 

502 flowjaface, ij = cbc.open_face_budgets_as_flowja( 

503 cbc_path, cast(List[cbc.Imeth1Header], header_list), grb_content 

504 ) 

505 cbc_content["flow-ja-face"] = flowjaface 

506 cbc_content["connectivity"] = ij 

507 else: 

508 assert all(isinstance(x, cbc.Imeth1Header) for x in header_list) 

509 flow_xy, flow_x, flow_y, lower = disv_open_face_budgets( 

510 cbc_path, grb_content, cast(List[cbc.Imeth1Header], header_list) 

511 ) 

512 cbc_content["flow-horizontal-face"] = flow_xy 

513 cbc_content["flow-horizontal-face-x"] = flow_x 

514 cbc_content["flow-horizontal-face-y"] = flow_y 

515 cbc_content["flow-lower-face"] = lower 

516 elif isinstance(header_list[0], cbc.Imeth1Header): 

517 assert all(isinstance(x, cbc.Imeth1Header) for x in header_list) 

518 cbc_content[key] = open_imeth1_budgets( 

519 cbc_path, grb_content, cast(List[cbc.Imeth1Header], header_list) 

520 ) 

521 elif isinstance(header_list[0], cbc.Imeth6Header): 

522 assert all(isinstance(x, cbc.Imeth6Header) for x in header_list) 

523 # for non cell flow budget terms, use auxiliary variables as return value 

524 if header_list[0].text.startswith("data-"): 

525 for return_variable in header_list[0].auxtxt: 

526 key_aux = header_list[0].txt2id1 + "-" + return_variable 

527 

528 cbc_content[key_aux] = open_imeth6_budgets( 

529 cbc_path, 

530 grb_content, 

531 cast(List[cbc.Imeth6Header], header_list), 

532 return_variable, 

533 return_id=indices, 

534 ) 

535 else: 

536 cbc_content[key] = open_imeth6_budgets( 

537 cbc_path, 

538 grb_content, 

539 cast(List[cbc.Imeth6Header], header_list), 

540 return_id=indices, 

541 ) 

542 

543 if simulation_start_time is not None: 

544 for cbc_name, cbc_array in cbc_content.items(): 

545 cbc_content[cbc_name] = assign_datetime_coords( 

546 cbc_array, simulation_start_time, time_unit 

547 ) 

548 return cbc_content 

549 

550 

551def grid_info(like: xu.UgridDataArray) -> Dict[str, Any]: 

552 grid = like.ugrid.grid 

553 facedim = grid.face_dimension 

554 return { 

555 "name": "head", 

556 "nlayer": like["layer"].size, 

557 "ncells_per_layer": like[facedim].size, 

558 "coords": { 

559 "layer": like["layer"], 

560 facedim: like[facedim], 

561 }, 

562 }