Coverage for C:\src\imod-python\imod\mf6\out\dis.py: 97%

180 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 11:41 +0200

1import os 

2import struct 

3from typing import Any, BinaryIO, Dict, List, Optional, Tuple, cast 

4 

5import dask 

6import numba 

7import numpy as np 

8import xarray as xr 

9 

10import imod 

11from imod.mf6.utilities.dataset import assign_datetime_coords 

12 

13from . import cbc 

14from .common import ( 

15 FilePath, 

16 FloatArray, 

17 IntArray, 

18 _to_nan, 

19 get_first_header_advanced_package, 

20) 

21 

22 

23# Binary Grid File / DIS Grids 

24# https://water.usgs.gov/water-resources/software/MODFLOW-6/mf6io_6.0.4.pdf#page=162 

25def read_grb(f: BinaryIO, ntxt: int, lentxt: int) -> Dict[str, Any]: 

26 # we don't need any information from the the text lines that follow, 

27 # they are definitions that aim to make the file more portable, 

28 # so let's skip straight to the binary data 

29 f.seek(ntxt * lentxt, 1) 

30 

31 ncells = struct.unpack("i", f.read(4))[0] 

32 nlayer = struct.unpack("i", f.read(4))[0] 

33 nrow = struct.unpack("i", f.read(4))[0] 

34 ncol = struct.unpack("i", f.read(4))[0] 

35 nja = struct.unpack("i", f.read(4))[0] 

36 if ncells != (nlayer * nrow * ncol): 

37 raise ValueError(f"Invalid file {ncells} {nlayer} {nrow} {ncol}") 

38 xorigin = struct.unpack("d", f.read(8))[0] 

39 yorigin = struct.unpack("d", f.read(8))[0] 

40 f.seek(8, 1) # skip angrot 

41 delr = np.fromfile(f, np.float64, ncol) 

42 delc = np.fromfile(f, np.float64, nrow) 

43 top_np = np.reshape(np.fromfile(f, np.float64, nrow * ncol), (nrow, ncol)) 

44 bottom_np = np.reshape(np.fromfile(f, np.float64, ncells), (nlayer, nrow, ncol)) 

45 ia = np.fromfile(f, np.int32, ncells + 1) 

46 ja = np.fromfile(f, np.int32, nja) 

47 idomain_np = np.reshape(np.fromfile(f, np.int32, ncells), (nlayer, nrow, ncol)) 

48 icelltype_np = np.reshape(np.fromfile(f, np.int32, ncells), (nlayer, nrow, ncol)) 

49 

50 bounds = (xorigin, xorigin + delr.sum(), yorigin, yorigin + delc.sum()) 

51 coords = imod.util.spatial._xycoords(bounds, (delr, -delc)) 

52 top = xr.DataArray(top_np, coords, ("y", "x"), name="top") 

53 coords["layer"] = np.arange(1, nlayer + 1) 

54 dims = ("layer", "y", "x") 

55 bottom = xr.DataArray(bottom_np, coords, dims, name="bottom") 

56 idomain = xr.DataArray(idomain_np, coords, dims, name="idomain") 

57 icelltype = xr.DataArray(icelltype_np, coords, dims, name="icelltype") 

58 

59 return { 

60 "distype": "dis", 

61 "top": top, 

62 "bottom": bottom, 

63 "coords": coords, 

64 "ncells": ncells, 

65 "nlayer": nlayer, 

66 "nrow": nrow, 

67 "ncol": ncol, 

68 "nja": nja, 

69 "ia": ia, 

70 "ja": ja, 

71 "idomain": idomain, 

72 "icelltype": icelltype, 

73 } 

74 

75 

76def read_times( 

77 path: FilePath, ntime: int, nlayer: int, nrow: int, ncol: int 

78) -> FloatArray: 

79 """ 

80 Reads all total simulation times. 

81 """ 

82 times = np.empty(ntime, dtype=np.float64) 

83 

84 # Compute how much to skip to the next timestamp 

85 start_of_header = 16 

86 rest_of_header = 28 

87 data_single_layer = nrow * ncol * 8 

88 header = 52 

89 nskip = ( 

90 rest_of_header 

91 + data_single_layer 

92 + (nlayer - 1) * (header + data_single_layer) 

93 + start_of_header 

94 ) 

95 

96 with open(path, "rb") as f: 

97 f.seek(start_of_header) 

98 for i in range(ntime): 

99 times[i] = struct.unpack("d", f.read(8))[0] # total simulation time 

100 f.seek(nskip, 1) 

101 return times 

102 

103 

104def read_hds_timestep( 

105 path: FilePath, nlayer: int, nrow: int, ncol: int, dry_nan: bool, pos: int 

106) -> FloatArray: 

107 """ 

108 Reads all values of one timestep. 

109 """ 

110 ncell_per_layer = nrow * ncol 

111 with open(path, "rb") as f: 

112 f.seek(pos) 

113 a1d = np.empty(nlayer * nrow * ncol, dtype=np.float64) 

114 for k in range(nlayer): 

115 f.seek(52, 1) # skip kstp, kper, pertime 

116 a1d[k * ncell_per_layer : (k + 1) * ncell_per_layer] = np.fromfile( 

117 f, np.float64, nrow * ncol 

118 ) 

119 

120 a3d = a1d.reshape((nlayer, nrow, ncol)) 

121 return _to_nan(a3d, dry_nan) 

122 

123 

124def open_hds( 

125 path: FilePath, 

126 grid_info: Dict[str, Any], 

127 dry_nan: bool, 

128 simulation_start_time: Optional[np.datetime64] = None, 

129 time_unit: Optional[str] = "d", 

130) -> xr.DataArray: 

131 nlayer, nrow, ncol = grid_info["nlayer"], grid_info["nrow"], grid_info["ncol"] 

132 filesize = os.path.getsize(path) 

133 ntime = filesize // (nlayer * (52 + (nrow * ncol * 8))) 

134 times = read_times(path, ntime, nlayer, nrow, ncol) 

135 coords = grid_info["coords"] 

136 coords["time"] = times 

137 

138 dask_list = [] 

139 # loop over times and add delayed arrays 

140 for i in range(ntime): 

141 # TODO verify dimension order 

142 pos = i * (nlayer * (52 + nrow * ncol * 8)) 

143 a = dask.delayed(read_hds_timestep)(path, nlayer, nrow, ncol, dry_nan, pos) 

144 x = dask.array.from_delayed(a, shape=(nlayer, nrow, ncol), dtype=np.float64) 

145 dask_list.append(x) 

146 

147 daskarr = dask.array.stack(dask_list, axis=0) 

148 data_array = xr.DataArray( 

149 daskarr, coords, ("time", "layer", "y", "x"), name=grid_info["name"] 

150 ) 

151 if simulation_start_time is not None: 

152 data_array = assign_datetime_coords( 

153 data_array, simulation_start_time, time_unit 

154 ) 

155 return data_array 

156 

157 

158def open_imeth1_budgets( 

159 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

160) -> xr.DataArray: 

161 """ 

162 Open the data for an imeth==1 budget section. Data is read lazily per 

163 timestep. 

164 

165 Can be used for: 

166 

167 * STO-SS 

168 * STO-SY 

169 * CSUB-CGELASTIC 

170 * CSUB-WATERCOMP 

171 

172 Utilizes the shape information from the DIS GRB file to create a dense 

173 array; (lazily) allocates for the entire domain (all layers, rows, columns) 

174 per timestep. 

175 

176 Parameters 

177 ---------- 

178 cbc_path: str, pathlib.Path 

179 grb_content: dict 

180 header_list: List[Imeth1Header] 

181 

182 Returns 

183 ------- 

184 xr.DataArray with dims ("time", "layer", "y", "x") 

185 """ 

186 nlayer = grb_content["nlayer"] 

187 nrow = grb_content["nrow"] 

188 ncol = grb_content["ncol"] 

189 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

190 # Merge dictionaries 

191 coords = grb_content["coords"] | {"time": budgets["time"]} 

192 

193 return xr.DataArray( 

194 data=budgets.data.reshape((budgets["time"].size, nlayer, nrow, ncol)), 

195 coords=coords, 

196 dims=("time", "layer", "y", "x"), 

197 name=budgets.name, 

198 ) 

199 

200 

201def open_imeth6_budgets( 

202 cbc_path: FilePath, 

203 grb_content: dict, 

204 header_list: List[cbc.Imeth6Header], 

205 return_variable: str = "budget", 

206 indices: np.ndarray | None = None, 

207) -> xr.DataArray: 

208 """ 

209 Open the data for an imeth==6 budget section. 

210 

211 Uses the information of the DIS GRB file to create the properly sized dense 

212 xr.DataArrays (which store the entire domain). Doing so ignores the boundary 

213 condition internal index (id2) and any present auxiliary columns. 

214 

215 Parameters 

216 ---------- 

217 cbc_path: str, pathlib.Path 

218 grb_content: dict 

219 header_list: List[Imeth1Header] 

220 return_variable: str 

221 return_id: np.ndarray | None 

222 

223 Returns 

224 ------- 

225 xr.DataArray with dims ("time", "layer", "y", "x") 

226 """ 

227 # Allocates dense arrays for the entire model domain 

228 dtype = np.dtype( 

229 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

230 + [(name, np.float64) for name in header_list[0].auxtxt] 

231 ) 

232 shape = (grb_content["nlayer"], grb_content["nrow"], grb_content["ncol"]) 

233 size = np.prod(shape) 

234 dask_list = [] 

235 time = np.empty(len(header_list), dtype=np.float64) 

236 for i, header in enumerate(header_list): 

237 time[i] = header.totim 

238 a = dask.delayed(cbc.read_imeth6_budgets_dense)( 

239 cbc_path, 

240 header.nlist, 

241 dtype, 

242 header.pos, 

243 size, 

244 shape, 

245 return_variable, 

246 indices, 

247 ) 

248 x = dask.array.from_delayed(a, shape=shape, dtype=np.float64) 

249 dask_list.append(x) 

250 

251 daskarr = dask.array.stack(dask_list, axis=0) 

252 coords = grb_content["coords"] 

253 coords["time"] = time 

254 name = header_list[0].text 

255 return xr.DataArray(daskarr, coords, ("time", "layer", "y", "x"), name=name) 

256 

257 

258@numba.njit 

259def dis_indices( 

260 ia: IntArray, 

261 ja: IntArray, 

262 ncells: int, 

263 nlayer: int, 

264 nrow: int, 

265 ncol: int, 

266) -> Tuple[IntArray, IntArray, IntArray]: 

267 """ 

268 Infer type of connection via cell number comparison. Returns arrays that can 

269 be used for extracting right, front, and lower face flow from the 

270 flow-ja-face array. 

271 

272 In a structured grid, using a linear index: 

273 * the right neighbor is +(1) 

274 * the front neighbor is +(number of cells in a column) 

275 * the lower neighbor is +(number of cells in a layer) 

276 * lower "pass-through" cells (idomain == -1) are multitude of (number of 

277 cells in a layer) 

278 

279 Parameters 

280 ---------- 

281 ia: Array of ints 

282 Row index of Compressed Sparse Row (CSR) connectivity matrix. 

283 ja: Array of ints 

284 Column index of CSR connectivity matrix. Every entry represents a 

285 cell-to-cell connection. 

286 ncells: int 

287 nlayer: int 

288 nrow: int 

289 ncol: int 

290 

291 Returns 

292 ------- 

293 right: 3D array of ints 

294 front: 3D array of ints 

295 lower: 3D array of ints 

296 """ 

297 shape = (nlayer, nrow, ncol) 

298 ncells_per_layer = nrow * ncol 

299 right = np.full(ncells, -1, np.int64) 

300 front = np.full(ncells, -1, np.int64) 

301 lower = np.full(ncells, -1, np.int64) 

302 

303 for i in range(ncells): 

304 for nzi in range(ia[i], ia[i + 1]): 

305 nzi -= 1 # python is 0-based, modflow6 is 1-based 

306 j = ja[nzi] - 1 # python is 0-based, modflow6 is 1-based 

307 d = j - i 

308 if d <= 0: # left, back, upper 

309 continue 

310 elif d == 1: # right neighbor 

311 right[i] = nzi 

312 elif d == ncol: # front neighbor 

313 front[i] = nzi 

314 elif d == ncells_per_layer: # lower neighbor 

315 lower[i] = nzi 

316 else: # skips one: must be pass through 

317 npassed = int(d / ncells_per_layer) 

318 for ipass in range(0, npassed): 

319 lower[i + ipass * ncells_per_layer] = nzi 

320 

321 return right.reshape(shape), front.reshape(shape), lower.reshape(shape) 

322 

323 

324def dis_to_right_front_lower_indices( 

325 grb_content: dict, 

326) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]: 

327 """ 

328 Infer the indices to extract right, front, and lower face flows from the 

329 flow-ja-face array. 

330 

331 Parameters 

332 ---------- 

333 grb_content: dict 

334 

335 Returns 

336 ------- 

337 right: xr.DataArray of ints with dims ("layer", "y", "x") 

338 front: xr.DataArray of ints with dims ("layer", "y", "x") 

339 lower: xr.DataArray of ints with dims ("layer", "y", "x") 

340 """ 

341 right, front, lower = dis_indices( 

342 ia=grb_content["ia"], 

343 ja=grb_content["ja"], 

344 ncells=grb_content["ncells"], 

345 nlayer=grb_content["nlayer"], 

346 nrow=grb_content["nrow"], 

347 ncol=grb_content["ncol"], 

348 ) 

349 return ( 

350 xr.DataArray(right, grb_content["coords"], ("layer", "y", "x")), 

351 xr.DataArray(front, grb_content["coords"], ("layer", "y", "x")), 

352 xr.DataArray(lower, grb_content["coords"], ("layer", "y", "x")), 

353 ) 

354 

355 

356def dis_extract_face_budgets( 

357 budgets: xr.DataArray, index: xr.DataArray 

358) -> xr.DataArray: 

359 """ 

360 Grab right, front, or lower face flows from the flow-ja-face array. 

361 

362 This could be done by a single .isel() indexing operation, but those 

363 are extremely slow in this case, which seems to be an xarray issue. 

364 

365 Parameters 

366 ---------- 

367 budgets: xr.DataArray of floats 

368 flow-ja-face array, dims ("time", "linear_index") 

369 The linear index enumerates cell-to-cell connections in this case, not 

370 the individual cells. 

371 index: xr.DataArray of ints 

372 right, front, or lower index array with dims("layer", "y", "x") 

373 

374 Returns 

375 ------- 

376 xr.DataArray of floats with dims ("time", "layer", "y", "x") 

377 """ 

378 coords = dict(index.coords) 

379 coords["time"] = budgets["time"] 

380 # isel with a 3D array is extremely slow 

381 # this followed by the dask reshape is much faster for some reason. 

382 data = budgets.isel(linear_index=index.values.ravel()).data 

383 da = xr.DataArray( 

384 data=data.reshape((budgets["time"].size, *index.shape)), 

385 coords=coords, 

386 dims=("time", "layer", "y", "x"), 

387 name="flow-ja-face", 

388 ) 

389 return da.where(index >= 0, other=0.0) 

390 

391 

392def dis_open_face_budgets( 

393 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

394) -> Tuple[xr.DataArray, xr.DataArray, xr.DataArray]: 

395 """ 

396 Open the flow-ja-face, and extract right, front, and lower face flows. 

397 

398 Parameters 

399 ---------- 

400 cbc_path: str, pathlib.Path 

401 grb_content: dict 

402 header_list: List[Imeth1Header] 

403 

404 Returns 

405 ------- 

406 right: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

407 front: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

408 lower: xr.DataArray of floats with dims ("time", "layer", "y", "x") 

409 """ 

410 right_index, front_index, lower_index = dis_to_right_front_lower_indices( 

411 grb_content 

412 ) 

413 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

414 right = dis_extract_face_budgets(budgets, right_index) 

415 front = dis_extract_face_budgets(budgets, front_index) 

416 lower = dis_extract_face_budgets(budgets, lower_index) 

417 return right, front, lower 

418 

419 

420# TODO: Currently assumes dis grb, can be checked & dispatched 

421def open_cbc( 

422 cbc_path: FilePath, 

423 grb_content: Dict[str, Any], 

424 flowja: bool = False, 

425 simulation_start_time: Optional[np.datetime64] = None, 

426 time_unit: Optional[str] = "d", 

427) -> Dict[str, xr.DataArray]: 

428 headers = cbc.read_cbc_headers(cbc_path) 

429 indices = None 

430 header_advanced_package = get_first_header_advanced_package(headers) 

431 if header_advanced_package is not None: 

432 # For advanced packages the id2 column of variable gwf contains the MF6 id's. 

433 # Get id's eager from first stress period. 

434 dtype = np.dtype( 

435 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

436 + [(name, np.float64) for name in header_advanced_package.auxtxt] 

437 ) 

438 table = cbc.read_imeth6_budgets( 

439 cbc_path, header_advanced_package.nlist, dtype, header_advanced_package.pos 

440 ) 

441 indices = table["id2"] - 1 # Convert to 0 based index 

442 cbc_content = {} 

443 for key, header_list in headers.items(): 

444 # TODO: validate homogeneity of header_list, ndat consistent, nlist consistent etc. 

445 if key == "flow-ja-face" and isinstance(header_list[0], cbc.Imeth1Header): 

446 assert all(isinstance(x, cbc.Imeth1Header) for x in header_list) 

447 if flowja: 

448 flowjaface, nm = cbc.open_face_budgets_as_flowja( 

449 cbc_path, cast(List[cbc.Imeth1Header], header_list), grb_content 

450 ) 

451 cbc_content["flow-ja-face"] = flowjaface 

452 cbc_content["connectivity"] = nm 

453 else: 

454 right, front, lower = dis_open_face_budgets( 

455 cbc_path, grb_content, cast(List[cbc.Imeth1Header], header_list) 

456 ) 

457 cbc_content["flow-right-face"] = right 

458 cbc_content["flow-front-face"] = front 

459 cbc_content["flow-lower-face"] = lower 

460 else: 

461 if isinstance(header_list[0], cbc.Imeth1Header): 

462 assert all(isinstance(x, cbc.Imeth1Header) for x in header_list) 

463 cbc_content[key] = open_imeth1_budgets( 

464 cbc_path, grb_content, cast(List[cbc.Imeth1Header], header_list) 

465 ) 

466 elif isinstance(header_list[0], cbc.Imeth6Header): 

467 assert all(isinstance(x, cbc.Imeth6Header) for x in header_list) 

468 

469 # for non cell flow budget terms, use auxiliary variables as return value 

470 if header_list[0].text.startswith("data-"): 

471 for return_variable in header_list[0].auxtxt: 

472 key_aux = header_list[0].txt2id1 + "-" + return_variable 

473 cbc_content[key_aux] = open_imeth6_budgets( 

474 cbc_path, 

475 grb_content, 

476 cast(List[cbc.Imeth6Header], header_list), 

477 return_variable, 

478 indices=indices, 

479 ) 

480 else: 

481 cbc_content[key] = open_imeth6_budgets( 

482 cbc_path, 

483 grb_content, 

484 cast(List[cbc.Imeth6Header], header_list), 

485 indices=indices, 

486 ) 

487 if simulation_start_time is not None: 

488 for cbc_name, cbc_array in cbc_content.items(): 

489 cbc_content[cbc_name] = assign_datetime_coords( 

490 cbc_array, simulation_start_time, time_unit 

491 ) 

492 

493 return cbc_content 

494 

495 

496def grid_info(like: xr.DataArray) -> Dict[str, Any]: 

497 return { 

498 "nlayer": like["layer"].size, 

499 "nrow": like["y"].size, 

500 "ncol": like["x"].size, 

501 "coords": { 

502 "layer": like["layer"], 

503 "y": like["y"], 

504 "x": like["x"], 

505 }, 

506 }