Coverage for C:\src\imod-python\imod\mf6\out\disv.py: 91%

221 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1import os 

2import struct 

3from typing import Any, BinaryIO, Dict, List, Optional, Tuple 

4 

5import dask 

6import numba 

7import numpy as np 

8import scipy.sparse 

9import xarray as xr 

10import xugrid as xu 

11 

12from imod.mf6.utilities.dataset import assign_datetime_coords 

13 

14from . import cbc 

15from .common import FilePath, FloatArray, IntArray, _to_nan 

16 

17 

18def _ugrid_iavert_javert( 

19 iavert: IntArray, javert: IntArray 

20) -> Tuple[IntArray, IntArray]: 

21 # The node numbers of MODFLOW loop around: the first equals the last 

22 # We have to remove these for the UGRID conventions, which do not loop around. 

23 n = np.diff(iavert) - 1 

24 # This also takes care of 0-based indexing: 

25 ia = np.concatenate(([0], np.cumsum(n))) 

26 keep = np.ones_like(javert, dtype=bool) 

27 # -2: -1 for 1- to 0-based indexing, -1 to get rid of closing node. 

28 keep[iavert[1:] - 2] = False 

29 return ia, javert[keep] - 1 

30 

31 

32def read_grb(f: BinaryIO, ntxt: int, lentxt: int) -> Dict[str, Any]: 

33 # we don't need any information from the the text lines that follow, 

34 # they are definitions that aim to make the file more portable, 

35 # so let's skip straight to the binary data 

36 f.seek(ntxt * lentxt, 1) 

37 

38 ncells = struct.unpack("i", f.read(4))[0] 

39 nlayer = struct.unpack("i", f.read(4))[0] 

40 ncells_per_layer = struct.unpack("i", f.read(4))[0] 

41 nvert = struct.unpack("i", f.read(4))[0] 

42 njavert = struct.unpack("i", f.read(4))[0] 

43 nja = struct.unpack("i", f.read(4))[0] 

44 if ncells != (nlayer * ncells_per_layer): 

45 raise ValueError(f"Invalid file {ncells} {nlayer} {ncells_per_layer}") 

46 _ = struct.unpack("d", f.read(8))[0] # xorigin 

47 _ = struct.unpack("d", f.read(8))[0] # yorigin 

48 f.seek(8, 1) # skip angrot 

49 top_np = np.fromfile(f, np.float64, ncells_per_layer) 

50 bottom_np = np.reshape( 

51 np.fromfile(f, np.float64, ncells), (nlayer, ncells_per_layer) 

52 ) 

53 vertices = np.reshape(np.fromfile(f, np.float64, nvert * 2), (nvert, 2)) 

54 _ = np.fromfile(f, np.float64, ncells_per_layer) # cellx 

55 _ = np.fromfile(f, np.float64, ncells_per_layer) # celly 

56 # Python is 0-based; MODFLOW6 is Fortran 1-based 

57 iavert = np.fromfile(f, np.int32, ncells_per_layer + 1) 

58 javert = np.fromfile(f, np.int32, njavert) 

59 ia = np.fromfile(f, np.int32, ncells + 1) 

60 ja = np.fromfile(f, np.int32, nja) 

61 idomain_np = np.reshape( 

62 np.fromfile(f, np.int32, ncells), (nlayer, ncells_per_layer) 

63 ) 

64 icelltype_np = np.reshape( 

65 np.fromfile(f, np.int32, ncells), (nlayer, ncells_per_layer) 

66 ) 

67 

68 iavert, javert = _ugrid_iavert_javert(iavert, javert) 

69 face_nodes = scipy.sparse.csr_matrix((javert, javert, iavert)) 

70 grid = xu.Ugrid2d(vertices[:, 0], vertices[:, 1], -1, face_nodes) 

71 facedim = grid.face_dimension 

72 

73 top = xr.DataArray(top_np, dims=[facedim], name="top") 

74 coords = {"layer": np.arange(1, nlayer + 1)} 

75 dims = ("layer", facedim) 

76 bottom = xr.DataArray(bottom_np, coords, dims, name="bottom") 

77 idomain = xr.DataArray(idomain_np, coords, dims, name="idomain") 

78 icelltype = xr.DataArray(icelltype_np, coords, dims, name="icelltype") 

79 

80 return { 

81 "distype": "disv", 

82 "grid": grid, 

83 "top": xu.UgridDataArray(top, grid), 

84 "bottom": xu.UgridDataArray(bottom, grid), 

85 "coords": coords, 

86 "ncells": ncells, 

87 "nlayer": nlayer, 

88 "ncells_per_layer": ncells_per_layer, 

89 "nja": nja, 

90 "ia": ia, 

91 "ja": ja, 

92 "idomain": xu.UgridDataArray(idomain, grid), 

93 "icelltype": xu.UgridDataArray(icelltype, grid), 

94 } 

95 

96 

97def read_times( 

98 path: FilePath, ntime: int, nlayer: int, ncells_per_layer: int 

99) -> FloatArray: 

100 """ 

101 Reads all total simulation times. 

102 """ 

103 times = np.empty(ntime, dtype=np.float64) 

104 

105 # Compute how much to skip to the next timestamp 

106 start_of_header = 16 

107 rest_of_header = 28 

108 data_single_layer = ncells_per_layer * 8 

109 header = 52 

110 nskip = ( 

111 rest_of_header 

112 + data_single_layer 

113 + (nlayer - 1) * (header + data_single_layer) 

114 + start_of_header 

115 ) 

116 

117 with open(path, "rb") as f: 

118 f.seek(start_of_header) 

119 for i in range(ntime): 

120 times[i] = struct.unpack("d", f.read(8))[0] # total simulation time 

121 f.seek(nskip, 1) 

122 return times 

123 

124 

125def read_hds_timestep( 

126 path: FilePath, nlayer: int, ncells_per_layer: int, dry_nan: bool, pos: int 

127) -> FloatArray: 

128 """ 

129 Reads all values of one timestep. 

130 """ 

131 with open(path, "rb") as f: 

132 f.seek(pos) 

133 a1d = np.empty(nlayer * ncells_per_layer, dtype=np.float64) 

134 for k in range(nlayer): 

135 f.seek(52, 1) # skip kstp, kper, pertime 

136 a1d[k * ncells_per_layer : (k + 1) * ncells_per_layer] = np.fromfile( 

137 f, np.float64, ncells_per_layer 

138 ) 

139 

140 a2d = a1d.reshape((nlayer, ncells_per_layer)) 

141 return _to_nan(a2d, dry_nan) 

142 

143 

144def open_hds( 

145 path: FilePath, 

146 grid_info: Dict[str, Any], 

147 dry_nan: bool, 

148 simulation_start_time: Optional[np.datetime64] = None, 

149 time_unit: Optional[str] = "d", 

150) -> xu.UgridDataArray: 

151 grid = grid_info["grid"] 

152 nlayer, ncells_per_layer = grid_info["nlayer"], grid_info["ncells_per_layer"] 

153 filesize = os.path.getsize(path) 

154 ntime = filesize // (nlayer * (52 + (ncells_per_layer * 8))) 

155 times = read_times(path, ntime, nlayer, ncells_per_layer) 

156 coords = grid_info["coords"] 

157 coords["time"] = times 

158 

159 dask_list = [] 

160 # loop over times and add delayed arrays 

161 for i in range(ntime): 

162 # TODO verify dimension order 

163 pos = i * (nlayer * (52 + ncells_per_layer * 8)) 

164 a = dask.delayed(read_hds_timestep)( 

165 path, nlayer, ncells_per_layer, dry_nan, pos 

166 ) 

167 x = dask.array.from_delayed( 

168 a, shape=(nlayer, ncells_per_layer), dtype=np.float64 

169 ) 

170 dask_list.append(x) 

171 

172 daskarr = dask.array.stack(dask_list, axis=0) 

173 da = xr.DataArray( 

174 daskarr, coords, ("time", "layer", grid.face_dimension), name=grid_info["name"] 

175 ) 

176 

177 if simulation_start_time is not None: 

178 da = assign_datetime_coords(da, simulation_start_time, time_unit) 

179 return xu.UgridDataArray(da, grid) 

180 

181 

182def open_imeth1_budgets( 

183 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

184) -> xu.UgridDataArray: 

185 """ 

186 Open the data for an imeth==1 budget section. Data is read lazily per 

187 timestep. 

188 

189 Can be used for: 

190 

191 * STO-SS 

192 * STO-SY 

193 * CSUB-CGELASTIC 

194 * CSUB-WATERCOMP 

195 

196 Utilizes the shape information from the DIS GRB file to create a dense 

197 array; (lazily) allocates for the entire domain (all layers, faces) 

198 per timestep. 

199 

200 Parameters 

201 ---------- 

202 cbc_path: str, pathlib.Path 

203 grb_content: dict 

204 header_list: List[Imeth1Header] 

205 

206 Returns 

207 ------- 

208 xr.DataArray with dims ("time", "layer", face_dimension) 

209 """ 

210 grid = grb_content["grid"] 

211 facedim = grid.face_dimension 

212 nlayer = grb_content["nlayer"] 

213 ncells_per_layer = grb_content["ncells_per_layer"] 

214 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

215 # Merge dictionaries 

216 coords = grb_content["coords"] | {"time": budgets["time"]} 

217 

218 da = xr.DataArray( 

219 data=budgets.data.reshape((budgets["time"].size, nlayer, ncells_per_layer)), 

220 coords=coords, 

221 dims=("time", "layer", facedim), 

222 name=None, 

223 ) 

224 return xu.UgridDataArray(da, grid) 

225 

226 

227def open_imeth6_budgets( 

228 cbc_path: FilePath, 

229 grb_content: dict, 

230 header_list: List[cbc.Imeth6Header], 

231 return_variable: str = "budget", 

232) -> xu.UgridDataArray: 

233 """ 

234 Open the data for an imeth==6 budget section. 

235 

236 Uses the information of the DIS GRB file to create the properly sized dense 

237 xr.DataArrays (which store the entire domain). Doing so ignores the boundary 

238 condition internal index (id2) and any present auxiliary columns. 

239 

240 Parameters 

241 ---------- 

242 cbc_path: str, pathlib.Path 

243 grb_content: dict 

244 header_list: List[Imeth1Header] 

245 

246 Returns 

247 ------- 

248 xr.DataArray with dims ("time", "layer", "y", "x") 

249 """ 

250 # Allocates dense arrays for the entire model domain 

251 dtype = np.dtype( 

252 [("id1", np.int32), ("id2", np.int32), ("budget", np.float64)] 

253 + [(name, np.float64) for name in header_list[0].auxtxt] 

254 ) 

255 shape = (grb_content["nlayer"], grb_content["ncells_per_layer"]) 

256 size = np.product(shape) 

257 dask_list = [] 

258 time = np.empty(len(header_list), dtype=np.float64) 

259 for i, header in enumerate(header_list): 

260 time[i] = header.totim 

261 a = dask.delayed(cbc.read_imeth6_budgets_dense)( 

262 cbc_path, header.nlist, dtype, header.pos, size, shape, return_variable 

263 ) 

264 x = dask.array.from_delayed(a, shape=shape, dtype=np.float64) 

265 dask_list.append(x) 

266 

267 daskarr = dask.array.stack(dask_list, axis=0) 

268 coords = grb_content["coords"] 

269 coords["time"] = time 

270 name = header_list[0].text 

271 grid = grb_content["grid"] 

272 da = xr.DataArray( 

273 daskarr, coords, ("time", "layer", grid.face_dimension), name=name 

274 ) 

275 return xu.UgridDataArray(da, grid) 

276 

277 

278@numba.njit 

279def disv_lower_index( 

280 ia: IntArray, 

281 ja: IntArray, 

282 ncells: int, 

283 nlayer: int, 

284 ncells_per_layer: int, 

285) -> IntArray: 

286 lower = np.full(ncells, -1, np.int64) 

287 for i in range(ncells): 

288 for nzi in range(ia[i], ia[i + 1]): 

289 nzi -= 1 # python is 0-based, modflow6 is 1-based 

290 j = ja[nzi] - 1 # python is 0-based, modflow6 is 1-based 

291 d = j - i 

292 if d < ncells_per_layer: # upper, diagonal, horizontal 

293 continue 

294 elif d == ncells_per_layer: # lower neighbor 

295 lower[i] = nzi 

296 else: # skips one: must be pass through 

297 npassed = int(d / ncells_per_layer) 

298 for ipass in range(0, npassed): 

299 lower[i + ipass * ncells_per_layer] = nzi 

300 

301 return lower.reshape(nlayer, ncells_per_layer) 

302 

303 

304def expand_indptr(ia: IntArray): 

305 n = np.diff(ia) 

306 return np.repeat(np.arange(ia.size - 1), n) 

307 

308 

309def disv_horizontal_index( 

310 ia: IntArray, 

311 ja: IntArray, 

312 nlayer: int, 

313 ncells_per_layer: int, 

314 edge_face_connectivity: IntArray, 

315 fill_value: int, 

316 face_coordinates: FloatArray, 

317): 

318 # Allocate output array 

319 nedge = len(edge_face_connectivity) 

320 horizontal = np.full((nlayer, nedge), -1) 

321 

322 # Grab the index values to the horizontal connections 

323 i = expand_indptr(ia) 

324 j = ja - 1 

325 d = j - i 

326 is_horizontal = (0 < d) & (d < ncells_per_layer) 

327 index = np.arange(j.size)[is_horizontal].reshape((nlayer, -1)) 

328 

329 # i -> j is pre-sorted (required by CSR structure); the edge_faces are repeated 

330 # per layer. Because i -> j is sorted in terms of face numbering, we need 

331 # only to figure out which order the edge_face_connectivity has. 

332 is_connection = edge_face_connectivity[:, 1] != fill_value 

333 edge_faces = edge_face_connectivity[is_connection] 

334 order = np.argsort(np.lexsort(edge_faces.T[::-1])) 

335 # Reshuffle for every layer 

336 index = index[:, order] 

337 

338 # Now set the values in the output array 

339 horizontal[:, is_connection] = index 

340 

341 # Compute unit components (x: u, y: v) 

342 edge_faces.sort(axis=1) 

343 u = np.full(nedge, np.nan) 

344 v = np.full(nedge, np.nan) 

345 xy = face_coordinates[edge_faces] 

346 dx = xy[:, 1, 0] - xy[:, 0, 0] 

347 dy = xy[:, 1, 1] - xy[:, 0, 1] 

348 t = np.sqrt(dx**2 + dy**2) 

349 u[is_connection] = dx / t 

350 v[is_connection] = dy / t 

351 return horizontal, u, v 

352 

353 

354def disv_to_horizontal_lower_indices( 

355 grb_content: dict, 

356) -> Tuple[xr.DataArray, xr.DataArray]: 

357 grid = grb_content["grid"] 

358 horizontal, u, v = disv_horizontal_index( 

359 ia=grb_content["ia"], 

360 ja=grb_content["ja"], 

361 nlayer=grb_content["nlayer"], 

362 ncells_per_layer=grb_content["ncells_per_layer"], 

363 edge_face_connectivity=grid.edge_face_connectivity, 

364 fill_value=grid.fill_value, 

365 face_coordinates=grid.face_coordinates, 

366 ) 

367 lower = disv_lower_index( 

368 ia=grb_content["ia"], 

369 ja=grb_content["ja"], 

370 ncells=grb_content["ncells"], 

371 nlayer=grb_content["nlayer"], 

372 ncells_per_layer=grb_content["ncells_per_layer"], 

373 ) 

374 

375 # Compute unit_vector 

376 

377 return ( 

378 xr.DataArray( 

379 horizontal, grb_content["coords"], dims=["layer", grid.edge_dimension] 

380 ), 

381 xr.DataArray(u, dims=[grid.edge_dimension]), 

382 xr.DataArray(v, dims=[grid.edge_dimension]), 

383 xr.DataArray(lower, grb_content["coords"], dims=["layer", grid.face_dimension]), 

384 ) 

385 

386 

387def disv_extract_lower_budget( 

388 budgets: xr.DataArray, index: xr.DataArray 

389) -> xr.DataArray: 

390 face_dimension = index.dims[-1] 

391 coords = dict(index.coords) 

392 coords["time"] = budgets["time"] 

393 # isel with a 3D array is extremely slow 

394 # this followed by the dask reshape is much faster for some reason. 

395 data = budgets.isel(linear_index=index.values.ravel()).data 

396 da = xr.DataArray( 

397 data=data.reshape((budgets["time"].size, *index.shape)), 

398 coords=coords, 

399 dims=("time", "layer", face_dimension), 

400 name="flow-ja-face", 

401 ) 

402 return da.where(index >= 0, other=0.0) 

403 

404 

405def disv_extract_horizontal_budget( 

406 budgets: xr.DataArray, index: xr.DataArray 

407) -> xr.DataArray: 

408 """ 

409 Grab horizontal flows from the flow-ja-face array. 

410 

411 This could be done by a single .isel() indexing operation, but those 

412 are extremely slow in this case, which seems to be an xarray issue. 

413 

414 Parameters 

415 ---------- 

416 budgets: xr.DataArray of floats 

417 flow-ja-face array, dims ("time", "linear_index") 

418 The linear index enumerates cell-to-cell connections in this case, not 

419 the individual cells. 

420 index: xr.DataArray of ints 

421 index array with dims("layer", edge_dimension) 

422 

423 Returns 

424 ------- 

425 xr.DataArray of floats with dims ("time", "layer", edge_dimension) 

426 """ 

427 edge_dimension = index.dims[-1] 

428 coords = dict(index.coords) 

429 coords["time"] = budgets["time"] 

430 # isel with a 3D array is extremely slow 

431 # this followed by the dask reshape is much faster for some reason. 

432 data = budgets.isel(linear_index=index.values.ravel()).data 

433 da = xr.DataArray( 

434 data=data.reshape((budgets["time"].size, *index.shape)), 

435 coords=coords, 

436 dims=("time", "layer", edge_dimension), 

437 name="flow-ja-face", 

438 ) 

439 return da.where(index >= 0, other=0.0) 

440 

441 

442def disv_open_face_budgets( 

443 cbc_path: FilePath, grb_content: dict, header_list: List[cbc.Imeth1Header] 

444) -> Tuple[xu.UgridDataArray]: 

445 horizontal_index, u, v, lower_index = disv_to_horizontal_lower_indices(grb_content) 

446 budgets = cbc.open_imeth1_budgets(cbc_path, header_list) 

447 horizontal = disv_extract_horizontal_budget(budgets, horizontal_index) 

448 lower = disv_extract_horizontal_budget(budgets, lower_index) 

449 flow_x = -horizontal * u 

450 flow_y = -horizontal * v 

451 grid = grb_content["grid"] 

452 return ( 

453 xu.UgridDataArray(horizontal, grid), 

454 xu.UgridDataArray(flow_x, grid), 

455 xu.UgridDataArray(flow_y, grid), 

456 xu.UgridDataArray(lower, grid), 

457 ) 

458 

459 

460def open_cbc( 

461 cbc_path: FilePath, 

462 grb_content: Dict[str, Any], 

463 flowja: bool = False, 

464 simulation_start_time: Optional[np.datetime64] = None, 

465 time_unit: Optional[str] = "d", 

466) -> Dict[str, xu.UgridDataArray]: 

467 headers = cbc.read_cbc_headers(cbc_path) 

468 cbc_content = {} 

469 for key, header_list in headers.items(): 

470 if key == "flow-ja-face": 

471 if flowja: 

472 flowja, ij = cbc.open_face_budgets_as_flowja( 

473 cbc_path, header_list, grb_content 

474 ) 

475 cbc_content["flow-ja-face"] = flowja 

476 cbc_content["connectivity"] = ij 

477 else: 

478 flow_xy, flow_x, flow_y, lower = disv_open_face_budgets( 

479 cbc_path, grb_content, header_list 

480 ) 

481 cbc_content["flow-horizontal-face"] = flow_xy 

482 cbc_content["flow-horizontal-face-x"] = flow_x 

483 cbc_content["flow-horizontal-face-y"] = flow_y 

484 cbc_content["flow-lower-face"] = lower 

485 elif isinstance(header_list[0], cbc.Imeth1Header): 

486 cbc_content[key] = open_imeth1_budgets(cbc_path, grb_content, header_list) 

487 elif isinstance(header_list[0], cbc.Imeth6Header): 

488 # for non cell flow budget terms, use auxiliary variables as return value 

489 if header_list[0].text.startswith("data-"): 

490 for return_variable in header_list[0].auxtxt: 

491 key_aux = header_list[0].txt2id1 + "-" + return_variable 

492 cbc_content[key_aux] = open_imeth6_budgets( 

493 cbc_path, grb_content, header_list, return_variable 

494 ) 

495 else: 

496 cbc_content[key] = open_imeth6_budgets( 

497 cbc_path, grb_content, header_list 

498 ) 

499 

500 if simulation_start_time is not None: 

501 for cbc_name, cbc_array in cbc_content.items(): 

502 cbc_content[cbc_name] = assign_datetime_coords( 

503 cbc_array, simulation_start_time, time_unit 

504 ) 

505 return cbc_content 

506 

507 

508def grid_info(like: xu.UgridDataArray) -> Dict[str, Any]: 

509 grid = like.ugrid.grid 

510 facedim = grid.face_dimension 

511 return { 

512 "name": "head", 

513 "nlayer": like["layer"].size, 

514 "ncells_per_layer": like[facedim].size, 

515 "coords": { 

516 "layer": like["layer"], 

517 facedim: like[facedim], 

518 }, 

519 }