Coverage for C:\src\imod-python\imod\prepare\common.py: 97%

356 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-08 13:27 +0200

1""" 

2Common methods used for interpolation, voxelization. 

3 

4Includes methods for dealing with different coordinates and dimensions of the 

5xarray.DataArrays, as well as aggregation methods operating on weights and 

6values. 

7""" 

8 

9import cftime 

10import numba 

11import numpy as np 

12 

13import imod 

14 

15 

16@numba.njit 

17def _starts(src_x, dst_x): 

18 """ 

19 Calculate regridding weights for a single dimension 

20 

21 Parameters 

22 ---------- 

23 src_x : np.array 

24 vertex coordinates of source 

25 dst_x: np.array 

26 vertex coordinates of destination 

27 """ 

28 i = 0 

29 j = 0 

30 while i < dst_x.size - 1: 

31 x = dst_x[i] 

32 while j < src_x.size: 

33 if src_x[j] > x: 

34 out = max(j - 1, 0) 

35 yield (i, out) 

36 break 

37 else: 

38 j += 1 

39 i += 1 

40 

41 

42def _weights_1d(src_x, dst_x, use_relative_weights=False): 

43 """ 

44 Calculate regridding weights and indices for a single dimension 

45 

46 Parameters 

47 ---------- 

48 src_x : np.array 

49 vertex coordinates of source 

50 dst_x: np.array 

51 vertex coordinates of destination 

52 

53 Returns 

54 ------- 

55 max_len : int 

56 maximum number of source cells to a single destination cell for this 

57 dimension 

58 dst_inds : list of int 

59 destination cell index 

60 src_inds: list of list of int 

61 source cell index, per destination index 

62 weights : list of list of float 

63 weight of source cell, per destination index 

64 """ 

65 max_len = 0 

66 dst_inds = [] 

67 src_inds = [] 

68 weights = [] 

69 rel_weights = [] 

70 

71 # i is index of dst 

72 # j is index of src 

73 for i, j in _starts(src_x, dst_x): 

74 dst_x0 = dst_x[i] 

75 dst_x1 = dst_x[i + 1] 

76 

77 _inds = [] 

78 _weights = [] 

79 _rel_weights = [] 

80 has_value = False 

81 while j < src_x.size - 1: 

82 src_x0 = src_x[j] 

83 src_x1 = src_x[j + 1] 

84 overlap = _overlap((dst_x0, dst_x1), (src_x0, src_x1)) 

85 # No longer any overlap, continue to next dst cell 

86 if overlap == 0: 

87 break 

88 else: 

89 has_value = True 

90 _inds.append(j) 

91 _weights.append(overlap) 

92 relative_overlap = overlap / (src_x1 - src_x0) 

93 _rel_weights.append(relative_overlap) 

94 j += 1 

95 if has_value: 

96 dst_inds.append(i) 

97 src_inds.append(_inds) 

98 weights.append(_weights) 

99 rel_weights.append(_rel_weights) 

100 # Save max number of source cells 

101 # So we know how much to pre-allocate later on 

102 inds_len = len(_inds) 

103 if inds_len > max_len: 

104 max_len = inds_len 

105 

106 # Convert all output to numpy arrays 

107 # numba does NOT like arrays or lists in tuples 

108 # Compilation time goes through the roof 

109 nrow = len(dst_inds) 

110 ncol = max_len 

111 np_dst_inds = np.array(dst_inds) 

112 

113 np_src_inds = np.full((nrow, ncol), -1) 

114 for i in range(nrow): 

115 for j, ind in enumerate(src_inds[i]): 

116 np_src_inds[i, j] = ind 

117 

118 np_weights = np.full((nrow, ncol), 0.0) 

119 if use_relative_weights: 

120 weights = rel_weights 

121 for i in range(nrow): 

122 for j, ind in enumerate(weights[i]): 

123 np_weights[i, j] = ind 

124 

125 return max_len, (np_dst_inds, np_src_inds, np_weights) 

126 

127 

128def _reshape(src, dst, ndim_regrid): 

129 """ 

130 If ndim > ndim_regrid, the non regridding dimension are combined into 

131 a single dimension, so we can use a single loop, irrespective of the 

132 total number of dimensions. 

133 (The alternative is pre-writing N for-loops for every N dimension we 

134 intend to support.) 

135 If ndims == ndim_regrid, all dimensions will be used in regridding 

136 in that case no looping over other dimensions is required and we add 

137 a dummy dimension here so there's something to iterate over. 

138 """ 

139 src_shape = src.shape 

140 dst_shape = dst.shape 

141 ndim = len(src_shape) 

142 

143 if ndim == ndim_regrid: 

144 n_iter = 1 

145 else: 

146 n_iter = int(np.product(src_shape[:-ndim_regrid])) 

147 

148 src_itershape = (n_iter, *src_shape[-ndim_regrid:]) 

149 dst_itershape = (n_iter, *dst_shape[-ndim_regrid:]) 

150 

151 iter_src = np.reshape(src, src_itershape) 

152 iter_dst = np.reshape(dst, dst_itershape) 

153 

154 return iter_src, iter_dst 

155 

156 

157def _is_subset(a1, a2): 

158 if np.in1d(a2, a1).all(): 

159 # This means all are present 

160 # now check if it's an actual subset 

161 # Generate number, and fetch only those present 

162 idx = np.arange(a1.size)[np.in1d(a1, a2)] 

163 if idx.size > 1: 

164 increment = np.diff(idx) 

165 # If the maximum increment is only 1, it's a subset 

166 if increment.max() == 1: 

167 return True 

168 return False 

169 

170 

171def _match_dims(src, like): 

172 """ 

173 Parameters 

174 ---------- 

175 source : xr.DataArray 

176 The source DataArray to be regridded 

177 like : xr.DataArray 

178 Example DataArray that shows what the resampled result should look like 

179 in terms of coordinates. `source` is regridded along dimensions of `like` 

180 that have the same name, but have different values. 

181 

182 Returns 

183 ------- 

184 matching_dims, regrid_dims, add_dims : tuple of lists 

185 matching_dims: dimensions along which the coordinates match exactly 

186 regrid_dims: dimensions along which source will be regridded 

187 add_dims: dimensions that are not present in like 

188 

189 """ 

190 # TODO: deal with different extent? 

191 # Do another check if not identical 

192 # Check if subset or superset? 

193 matching_dims = [] 

194 regrid_dims = [] 

195 add_dims = [] 

196 for dim in src.dims: 

197 if dim not in like.dims: 

198 add_dims.append(dim) 

199 elif src[dim].size == 0: # zero overlap 

200 regrid_dims.append(dim) 

201 else: 

202 try: 

203 a1 = _coord(src, dim) 

204 a2 = _coord(like, dim) 

205 if np.array_equal(a1, a2) or _is_subset(a1, a2): 

206 matching_dims.append(dim) 

207 else: 

208 regrid_dims.append(dim) 

209 except TypeError: 

210 first_type = type(like[dim].values[0]) 

211 if issubclass(first_type, (cftime.datetime, np.datetime64)): 

212 raise RuntimeError( 

213 "cannot regrid over datetime dimensions. " 

214 "Use xarray.Dataset.resample() instead" 

215 ) 

216 

217 ndim_regrid = len(regrid_dims) 

218 # Check number of dimension to regrid 

219 if ndim_regrid > 3: 

220 raise NotImplementedError("cannot regrid over more than three dimensions") 

221 

222 return matching_dims, regrid_dims, add_dims 

223 

224 

225def _increasing_dims(da, dims): 

226 flip_dims = [] 

227 for dim in dims: 

228 if not da.indexes[dim].is_monotonic_increasing: 

229 flip_dims.append(dim) 

230 da = da.isel({dim: slice(None, None, -1)}) 

231 return da, flip_dims 

232 

233 

234def _selection_indices(src_x, xmin, xmax, extra_overlap): 

235 """Left-inclusive""" 

236 # Extra overlap is needed, for example with (multi)linear interpolation 

237 # We simply enlarge the slice at the start and at the end. 

238 i0 = max(0, np.searchsorted(src_x, xmin, side="right") - 1 - extra_overlap) 

239 i1 = np.searchsorted(src_x, xmax, side="left") + extra_overlap 

240 return i0, i1 

241 

242 

243def _slice_src(src, like, extra_overlap): 

244 """ 

245 Make sure src matches dst in dims that do not have to be regridded 

246 """ 

247 matching_dims, regrid_dims, _ = _match_dims(src, like) 

248 dims = matching_dims + regrid_dims 

249 

250 slices = {} 

251 for dim in dims: 

252 # Generate vertices 

253 src_x = _coord(src, dim) 

254 _, xmin, xmax = imod.util.spatial.coord_reference(like[dim]) 

255 i0, i1 = _selection_indices(src_x, xmin, xmax, extra_overlap) 

256 slices[dim] = slice(i0, i1) 

257 return src.isel(slices) 

258 

259 

260def _dst_coords(src, like, dims_from_src, dims_from_like): 

261 """ 

262 Gather destination coordinates 

263 """ 

264 

265 dst_da_coords = {} 

266 dst_shape = [] 

267 # TODO: do some more checking, more robust handling 

268 like_coords = dict(like.coords) 

269 for dim in dims_from_src: 

270 try: 

271 like_coords.pop(dim) 

272 except KeyError: 

273 pass 

274 dst_da_coords[dim] = src[dim].values 

275 dst_shape.append(src[dim].size) 

276 for dim in dims_from_like: 

277 try: 

278 like_coords.pop(dim) 

279 except KeyError: 

280 pass 

281 dst_da_coords[dim] = like[dim].values 

282 dst_shape.append(like[dim].size) 

283 

284 dst_da_coords.update(like_coords) 

285 return dst_da_coords, dst_shape 

286 

287 

288def _check_monotonic(dxs, dim): 

289 # use xor to check if one or the other 

290 if not ((dxs > 0.0).all() ^ (dxs < 0.0).all()): 

291 raise ValueError(f"{dim} is not only increasing or only decreasing") 

292 

293 

294def _set_cellsizes(da, dims): 

295 for dim in dims: 

296 dx_string = f"d{dim}" 

297 if dx_string not in da.coords: 

298 dx, _, _ = imod.util.spatial.coord_reference(da.coords[dim]) 

299 if isinstance(dx, (int, float)): 

300 dx = np.full(da.coords[dim].size, dx) 

301 da = da.assign_coords({dx_string: (dim, dx)}) 

302 return da 

303 

304 

305def _set_scalar_cellsizes(da): 

306 for dim in da.dims: 

307 dx_string = f"d{dim}" 

308 if dx_string in da.coords: 

309 dx = da.coords[dx_string] 

310 # Ensure no leftover coordinates in scalar 

311 if dx.ndim == 0: # Catch case where dx already is a scalar 

312 dx_scalar = dx.values[()] 

313 else: 

314 dx_scalar = dx.values[0] 

315 if np.allclose(dx, dx_scalar): 

316 da = da.assign_coords({dx_string: dx_scalar}) 

317 return da 

318 

319 

320def _coord(da, dim): 

321 """ 

322 Transform N xarray midpoints into N + 1 vertex edges 

323 """ 

324 delta_dim = "d" + dim # e.g. dx, dy, dz, etc. 

325 

326 # If empty array, return empty 

327 if da[dim].size == 0: 

328 return np.array(()) 

329 

330 if delta_dim in da.coords: # equidistant or non-equidistant 

331 dx = da[delta_dim].values 

332 if dx.shape == () or dx.shape == (1,): # scalar -> equidistant 

333 dxs = np.full(da[dim].size, dx) 

334 else: # array -> non-equidistant 

335 dxs = dx 

336 _check_monotonic(dxs, dim) 

337 

338 else: # not defined -> equidistant 

339 if da[dim].size == 1: 

340 raise ValueError( 

341 f"DataArray has size 1 along {dim}, so cellsize must be provided" 

342 " as a coordinate." 

343 ) 

344 dxs = np.diff(da[dim].values) 

345 dx = dxs[0] 

346 atolx = abs(1.0e-4 * dx) 

347 if not np.allclose(dxs, dx, atolx): 

348 raise ValueError( 

349 f"DataArray has to be equidistant along {dim}, or cellsizes" 

350 " must be provided as a coordinate." 

351 ) 

352 dxs = np.full(da[dim].size, dx) 

353 

354 dxs = np.abs(dxs) 

355 x = da[dim].values 

356 if not da.indexes[dim].is_monotonic_increasing: 

357 x = x[::-1] 

358 dxs = dxs[::-1] 

359 

360 # This assumes the coordinate to be monotonic increasing 

361 x0 = x[0] - 0.5 * dxs[0] 

362 x = np.full(dxs.size + 1, x0) 

363 x[1:] += np.cumsum(dxs) 

364 return x 

365 

366 

367def _define_single_dim_slices(src_x, dst_x, chunksizes): 

368 n = len(chunksizes) 

369 if not n > 0: 

370 raise ValueError("n must be larger than zero") 

371 if n == 1: 

372 return [slice(None, None)] 

373 

374 chunk_indices = np.full(n + 1, 0) 

375 chunk_indices[1:] = np.cumsum(chunksizes) 

376 # Find locations to cut. 

377 src_chunk_x = src_x[chunk_indices] 

378 if dst_x[0] < src_chunk_x[0]: 

379 src_chunk_x[0] = dst_x[0] 

380 if dst_x[-1] > src_chunk_x[-1]: 

381 src_chunk_x[-1] = dst_x[-1] 

382 # Destinations should NOT have any overlap 

383 # Sources may have overlap 

384 # We find the most suitable places to cut. 

385 dst_i = np.searchsorted(dst_x, src_chunk_x, "left") 

386 dst_i[dst_i > dst_x.size - 1] = dst_x.size - 1 

387 

388 # Create slices, but only if start and end are different 

389 # (otherwise, the slice would be empty) 

390 dst_slices = [slice(s, e) for s, e in zip(dst_i[:-1], dst_i[1:]) if s != e] 

391 return dst_slices 

392 

393 

394def _define_slices(src, like): 

395 """ 

396 Defines the slices for every dimension, based on the chunks that are 

397 present within src. 

398 

399 First, we get a single list of chunks per dimension. 

400 Next, these are expanded into an N-dimensional array, equal to the number 

401 of dimensions that have chunks. 

402 Finally, these arrays are ravelled, and stacked for easier iteration. 

403 """ 

404 dst_dim_slices = [] 

405 dst_chunks_shape = [] 

406 for dim, chunksizes in zip(src.dims, src.chunks): 

407 if dim in like.dims: 

408 dst_slices = _define_single_dim_slices( 

409 _coord(src, dim), _coord(like, dim), chunksizes 

410 ) 

411 dst_dim_slices.append(dst_slices) 

412 dst_chunks_shape.append(len(dst_slices)) 

413 

414 dst_expanded_slices = np.stack( 

415 [a.ravel() for a in np.meshgrid(*dst_dim_slices, indexing="ij")], axis=-1 

416 ) 

417 return dst_expanded_slices, dst_chunks_shape 

418 

419 

420def _sel_chunks(da, dims, expanded_slices): 

421 """ 

422 Using the slices created with the functions above, use xarray's index 

423 selection methods to create a list of "like" DataArrays which are used 

424 to inform the regridding. During the regrid() call of the 

425 imod.prepare.Regridder object, data from the input array is selected, 

426 ideally one chunk at time, or 2 ** ndim_chunks if there is overlap 

427 required due to cellsize differences. 

428 """ 

429 das = [] 

430 for dim_slices in expanded_slices: 

431 slice_dict = {} 

432 for dim, dim_slice in zip(dims, dim_slices): 

433 slice_dict[dim] = dim_slice 

434 das.append(da.isel(**slice_dict)) 

435 return das 

436 

437 

438def _get_method(method, methods): 

439 if isinstance(method, str): 

440 try: 

441 _method = methods[method] 

442 except KeyError as e: 

443 raise ValueError( 

444 "Invalid regridding method. Available methods are: {}".format( 

445 methods.keys() 

446 ) 

447 ) from e 

448 elif callable(method): 

449 _method = method 

450 else: 

451 raise TypeError("method must be a string or rasterio.enums.Resampling") 

452 return _method 

453 

454 

455@numba.njit 

456def _overlap(a, b): 

457 return max(0, min(a[1], b[1]) - max(a[0], b[0])) 

458 

459 

460def mean(values, weights): 

461 vsum = 0.0 

462 wsum = 0.0 

463 for i in range(values.size): 

464 v = values[i] 

465 w = weights[i] 

466 if np.isnan(v): 

467 continue 

468 vsum += w * v 

469 wsum += w 

470 if wsum == 0: 

471 return np.nan 

472 else: 

473 return vsum / wsum 

474 

475 

476def harmonic_mean(values, weights): 

477 v_agg = 0.0 

478 w_sum = 0.0 

479 for i in range(values.size): 

480 v = values[i] 

481 w = weights[i] 

482 if np.isnan(v) or v == 0: 

483 continue 

484 if w > 0: 

485 w_sum += w 

486 v_agg += w / v 

487 if v_agg == 0 or w_sum == 0: 

488 return np.nan 

489 else: 

490 return w_sum / v_agg 

491 

492 

493def geometric_mean(values, weights): 

494 v_agg = 0.0 

495 w_sum = 0.0 

496 

497 # Compute sum to ormalize weights to avoid tiny or huge values in exp 

498 normsum = 0.0 

499 for i in range(values.size): 

500 normsum += weights[i] 

501 # Early return if no values 

502 if normsum == 0: 

503 return np.nan 

504 

505 for i in range(values.size): 

506 w = weights[i] / normsum 

507 v = values[i] 

508 # Skip if v == 0, v is NaN or w == 0 (no contribution) 

509 if v > 0 and w > 0: 

510 v_agg += w * np.log(abs(v)) 

511 w_sum += w 

512 # Do not reduce over negative values: would require complex numbers. 

513 elif v < 0: 

514 return np.nan 

515 

516 if w_sum == 0: 

517 return np.nan 

518 else: 

519 return np.exp((1.0 / w_sum) * v_agg) 

520 

521 

522def sum(values, weights): 

523 v_sum = 0.0 

524 w_sum = 0.0 

525 for i in range(values.size): 

526 v = values[i] 

527 w = weights[i] 

528 if np.isnan(v): 

529 continue 

530 v_sum += v 

531 w_sum += w 

532 if w_sum == 0: 

533 return np.nan 

534 else: 

535 return v_sum 

536 

537 

538def minimum(values, weights): 

539 return np.nanmin(values) 

540 

541 

542def maximum(values, weights): 

543 return np.nanmax(values) 

544 

545 

546def mode(values, weights): 

547 # Area weighted mode 

548 # Reuse weights to do counting: no allocations 

549 # The alternative is defining a separate frequency array in which to add 

550 # the weights. This implementation is less efficient in terms of looping. 

551 # With many unique values, it keeps having to loop through a big part of 

552 # the weights array... but it would do so with a separate frequency array 

553 # as well. There are somewhat more elements to traverse in this case. 

554 s = values.size 

555 w_sum = 0 

556 for i in range(s): 

557 v = values[i] 

558 w = weights[i] 

559 if np.isnan(v): 

560 continue 

561 w_sum += 1 

562 for j in range(i): # Compare with previously found values 

563 if values[j] == v: # matches previous value 

564 weights[j] += w # increase previous weight 

565 break 

566 

567 if w_sum == 0: # It skipped everything: only nodata values 

568 return np.nan 

569 else: # Find value with highest frequency 

570 w_max = 0 

571 for i in range(s): 

572 w = weights[i] 

573 if w > w_max: 

574 w_max = w 

575 v = values[i] 

576 return v 

577 

578 

579def median(values, weights): 

580 return np.nanpercentile(values, 50) 

581 

582 

583def conductance(values, weights): 

584 v_agg = 0.0 

585 w_sum = 0.0 

586 for i in range(values.size): 

587 v = values[i] 

588 w = weights[i] 

589 if np.isnan(v): 

590 continue 

591 v_agg += v * w 

592 w_sum += w 

593 if w_sum == 0: 

594 return np.nan 

595 else: 

596 return v_agg 

597 

598 

599def max_overlap(values, weights): 

600 max_w = 0.0 

601 v = np.nan 

602 for i in range(values.size): 

603 w = weights[i] 

604 if w > max_w: 

605 max_w = w 

606 v = values[i] 

607 return v 

608 

609 

610METHODS = { 

611 "nearest": "nearest", 

612 "multilinear": "multilinear", 

613 "mean": mean, 

614 "harmonic_mean": harmonic_mean, 

615 "geometric_mean": geometric_mean, 

616 "sum": sum, 

617 "minimum": minimum, 

618 "maximum": maximum, 

619 "mode": mode, 

620 "median": median, 

621 "conductance": conductance, 

622 "max_overlap": max_overlap, 

623}