Coverage for C:\src\imod-python\imod\mf6\utilities\dataset.py: 85%

26 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-16 11:25 +0200

1from typing import Any, Optional 

2 

3import numpy as np 

4import pandas as pd 

5import xarray as xr 

6from xarray.core.utils import is_scalar 

7 

8from imod.typing import GridDataArray 

9 

10 

11def remove_inactive(ds: xr.Dataset, active: xr.DataArray) -> xr.Dataset: 

12 """ 

13 Drop list-based input cells in inactive cells. 

14 

15 Parameters 

16 ---------- 

17 ds: xr.Dataset 

18 Dataset with list-based input. Needs "cellid" variable. 

19 active: xr.DataArray 

20 Grid with active cells. 

21 """ 

22 

23 def unstack_columns(array): 

24 # Unstack columns: 

25 # https://stackoverflow.com/questions/64097426/is-there-unstack-in-numpy 

26 # Make sure to use tuples, since these get the special treatment 

27 # which we require for the indexing: 

28 # https://numpy.org/doc/stable/user/basics.indexing.html#dealing-with-variable-numbers-of-indices-within-programs 

29 return tuple(np.moveaxis(array, -1, 0)) 

30 

31 if "cellid" not in ds.data_vars: 

32 raise ValueError("Missing variable 'cellid' in dataset") 

33 if "ncellid" not in ds.dims: 

34 raise ValueError("Missing dimension 'ncellid' in dataset") 

35 

36 cellid_zero_based = ds["cellid"].values - 1 

37 cellid_indexes = unstack_columns(cellid_zero_based) 

38 valid = active.values[cellid_indexes].astype(bool) 

39 

40 return ds.loc[{"ncellid": valid}] 

41 

42 

43def is_dataarray_none(datarray: Any) -> bool: 

44 return isinstance(datarray, xr.DataArray) and datarray.isnull().all().item() 

45 

46 

47def get_scalar_variables(ds: GridDataArray) -> list[str]: 

48 """Returns scalar variables in a dataset.""" 

49 return [var for var, arr in ds.variables.items() if is_scalar(arr)] 

50 

51 

52def assign_datetime_coords( 

53 da: GridDataArray, 

54 simulation_start_time: np.datetime64, 

55 time_unit: Optional[str] = "d", 

56) -> GridDataArray: 

57 if "time" not in da.coords: 

58 raise ValueError( 

59 "cannot convert time column, because a time column could not be found" 

60 ) 

61 

62 time = pd.Timestamp(simulation_start_time) + pd.to_timedelta( 

63 da["time"], unit=time_unit 

64 ) 

65 return da.assign_coords(time=time)