Coverage for C:\src\imod-python\imod\evaluate\head.py: 91%

89 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-08 14:15 +0200

1import warnings 

2 

3import numpy as np 

4import pandas as pd 

5import xarray as xr 

6 

7 

8def convert_pointwaterhead_freshwaterhead( 

9 pointwaterhead, density, elevation, density_fresh=1000.0 

10): 

11 r"""Function to convert point water head (as outputted by seawat) 

12 into freshwater head, using Eq.3 from Guo, W., & Langevin, C. D. (2002): 

13 

14 .. math:: h_{f}=\frac{\rho}{\rho_{f}}h-\frac{\rho-\rho_{f}}{\rho_{f}}Z 

15 

16 An edge case arises when the head is below the cell centre, or entirely below 

17 the cell. Strictly applying Eq.3 would result in freshwater heads that are 

18 lower than the original point water head, which is physically impossible. This 

19 function then outputs the freshwaterhead for the uppermost underlying cell where 

20 the original point water head exceeds the cell centre. 

21 

22 Parameters 

23 ---------- 

24 pointwaterhead : float or xr.DataArray of floats 

25 the point water head as outputted by SEAWAT, in m. 

26 density : float or xr.DataArray of floats 

27 the water density at the same locations as `pointwaterhead`. 

28 elevation : float or xr.DataArray of floats 

29 elevation at the same locations as `pointwaterhead`, in m. 

30 density_fresh : float, optional 

31 the density of freshwater (1000 kg/m3), or a different value if 

32 different units are used, or a different density reference is required. 

33 

34 Returns 

35 ------- 

36 freshwaterhead : float or xr.DataArray of floats 

37 """ 

38 

39 freshwaterhead = ( 

40 density / density_fresh * pointwaterhead 

41 - (density - density_fresh) / density_fresh * elevation 

42 ) 

43 

44 # edge case: point water head below z 

45 # return freshwater head of top underlying cell where elevation < pointwaterhead 

46 # only for xr.DataArrays 

47 if isinstance(pointwaterhead, xr.DataArray) and "layer" in pointwaterhead.dims: 

48 freshwaterhead = freshwaterhead.where(pointwaterhead > elevation) 

49 freshwaterhead = freshwaterhead.bfill(dim="layer") 

50 

51 return freshwaterhead 

52 

53 

54def _calculate_gxg( 

55 head_bimonthly: xr.DataArray, below_surfacelevel: bool = False 

56) -> xr.DataArray: 

57 import bottleneck as bn 

58 

59 # Most efficient way of finding the three highest and three lowest is via a 

60 # partition. See: 

61 # https://bottleneck.readthedocs.io/en/latest/reference.html#bottleneck.partition 

62 

63 def lowest3_mean(da: xr.DataArray): 

64 a = bn.partition(da.values, kth=2, axis=-1) 

65 

66 with warnings.catch_warnings(): 

67 warnings.simplefilter("ignore", category=RuntimeWarning) 

68 result = np.nanmean(a[..., :3], axis=-1) 

69 

70 template = da.isel(bimonth=0) 

71 return template.copy(data=result) 

72 

73 def highest3_mean(da: xr.DataArray): 

74 a = bn.partition(-da.values, kth=2, axis=-1) 

75 

76 with warnings.catch_warnings(): 

77 warnings.simplefilter("ignore", category=RuntimeWarning) 

78 result = np.nanmean(-a[..., :3], axis=-1) 

79 

80 template = da.isel(bimonth=0) 

81 return template.copy(data=result) 

82 

83 timesize = head_bimonthly["time"].size 

84 if timesize % 24 != 0: 

85 raise ValueError("head is not bimonthly for a full set of years") 

86 n_year = int(timesize / 24) 

87 

88 # First and second date of March: 4, 5; first date of April: 6. 

89 month_index = np.array([4, 5, 6]) 

90 # Repeat this for every year in dataset, and increment by 24 per repetition. 

91 yearly_increments = (np.arange(n_year) * 24)[:, np.newaxis] 

92 # Broadcast to a full set 

93 gvg_index = xr.DataArray( 

94 data=(month_index + yearly_increments), dims=("hydroyear", "bimonth") 

95 ) 

96 gvg_data = head_bimonthly.isel(time=gvg_index) 

97 # Filters years without 3 available measurments. 

98 gvg_years = gvg_data.count("bimonth") == 3 

99 gvg_data = gvg_data.where(gvg_years) 

100 

101 # Hydrological years: running from 1 April to 1 April in the Netherlands. 

102 # Increment run from April (6th date) to April (30th date) for every year. 

103 # Broadcast to a full set 

104 newdims = ("hydroyear", "bimonth") 

105 gxg_index = xr.DataArray( 

106 data=(np.arange(6, 30) + yearly_increments[:-1]), 

107 dims=newdims, 

108 ) 

109 gxg_data = head_bimonthly.isel(time=gxg_index) 

110 dims = [dim for dim in gxg_data.dims if dim not in newdims] 

111 dims.extend(newdims) 

112 gxg_data = gxg_data.transpose(*dims) 

113 

114 # Filter years without 24 measurements. 

115 gxg_years = gxg_data.count("bimonth") == 24 

116 gxg_data = gxg_data.where(gxg_years) 

117 

118 # First compute LG3 and HG3 per hydrological year, then compute the mean over the total. 

119 if gxg_data.chunks is not None: 

120 # If data is lazily loaded/chunked, process data of one year at a time. 

121 gxg_data = gxg_data.chunk({"hydroyear": 1}) 

122 lg3 = xr.map_blocks(lowest3_mean, gxg_data, template=gxg_data.isel(bimonth=0)) 

123 hg3 = xr.map_blocks(highest3_mean, gxg_data, template=gxg_data.isel(bimonth=0)) 

124 else: 

125 # Otherwise, just compute it in a single go. 

126 lg3 = lowest3_mean(gxg_data) 

127 hg3 = highest3_mean(gxg_data) 

128 

129 gxg = xr.Dataset() 

130 gxg["gvg"] = gvg_data.mean(("hydroyear", "bimonth")) 

131 

132 ghg = hg3.mean("hydroyear") 

133 glg = lg3.mean("hydroyear") 

134 if below_surfacelevel: 

135 gxg["glg"] = ghg 

136 gxg["ghg"] = glg 

137 else: 

138 gxg["glg"] = glg 

139 gxg["ghg"] = ghg 

140 

141 # Add the numbers of years used in the calculation 

142 gxg["n_years_gvg"] = gvg_years.sum("hydroyear") 

143 gxg["n_years_gxg"] = gxg_years.sum("hydroyear") 

144 return gxg 

145 

146 

147def calculate_gxg_points( 

148 df: pd.DataFrame, 

149 id: str = "id", 

150 time: str = "time", 

151 head: str = "head", 

152 below_surfacelevel: bool = False, 

153 tolerance: pd.Timedelta = pd.Timedelta(days=7), 

154) -> pd.DataFrame: 

155 """ 

156 Calculate GxG groundwater characteristics from head time series. 

157 

158 GLG and GHG (average lowest and average highest groundwater level respectively) are 

159 calculated as the average of the three lowest (GLG) or highest (GHG) head values per 

160 Dutch hydrological year (april - april), for head values measured at a semi-monthly frequency 

161 (14th and 28th of every month). GVG (average spring groundwater level) is calculated as 

162 the average of groundwater level on 14th and 28th of March, and 14th of April. Supplied head 

163 values are resampled (nearest) to the 14/28 frequency. 

164 

165 Hydrological years without all 24 14/28 dates present are discarded for glg and ghg. 

166 Years without the 3 dates for gvg are discarded. 

167 

168 Parameters 

169 ---------- 

170 df: pd.DataFrame 

171 Dataframe containing the piezometer IDs, dates of measurement, and 

172 measured heads. 

173 id: str 

174 Column name of piezometer ID. 

175 time: str 

176 Column name of datetime. 

177 head: str 

178 Column name of head measurement. 

179 below_surfacelevel: bool 

180 False (default) if heads are relative to a datum (e.g. sea level). If 

181 True, heads are taken as m below surface level. 

182 tolerance: pd.Timedelta, default: 7 days. 

183 Maximum time window allowed when searching for dates around the 14th 

184 and 28th of every month. 

185 

186 Returns 

187 ------- 

188 gxg : pd.DataFrame 

189 Dataframe containing ``glg``: average lowest head, ``ghg``: average 

190 highest head, ``gvg``: average spring head, ``n_years_gvg``: numbers of 

191 years used for gvg, ``n_years_gxg``: numbers of years used for glg and 

192 ghg. 

193 

194 Examples 

195 -------- 

196 

197 Read some IPF data and compute the GxG values, while specifying the 

198 (non-standard) column names: 

199 

200 >>> import imod 

201 >>> df = imod.ipf.read("piezometers.ipf") 

202 >>> gxg = imod.evaluate.calculate_gxg_points( 

203 >>> df=df, 

204 >>> id="ID-column", 

205 >>> time="Date", 

206 >>> head="Piezometer head (m)", 

207 >>> ) 

208 """ 

209 

210 def bimonthly(series: pd.Series, dates: pd.DatetimeIndex, tolerance: pd.Timedelta): 

211 series = series[~series.index.duplicated(keep="first")] 

212 return series.reindex(dates, method="nearest", tolerance=tolerance) 

213 

214 if not isinstance(df, pd.DataFrame): 

215 raise TypeError(f"df must be a DataFrame, received: {type(df).__name__}") 

216 for name in (id, time, head): 

217 if name not in df: 

218 raise ValueError(f"Column {name} not present in dataframe") 

219 

220 # Create a bi-monthly date range 

221 start = f"{df[time].min().year}-01-01" 

222 end = f"{df[time].max().year}-12-31" 

223 dates = pd.date_range( 

224 start=start, end=end, freq="SMS", name="time" 

225 ) + pd.DateOffset(days=13) 

226 

227 # Convert for every location the time series to the same date range. This 

228 # forms a rectangular array which we can represent directly as a DataArray. 

229 bimonthly_series = ( 

230 df.set_index(time).groupby(id)[head].apply(bimonthly, dates, tolerance) 

231 ) 

232 head_bimonthly = bimonthly_series.to_xarray() 

233 

234 # Calculate GXG values per location 

235 gxg = _calculate_gxg(head_bimonthly, below_surfacelevel) 

236 # Transform back to a DataFrame 

237 return gxg.to_dataframe() 

238 

239 

240def calculate_gxg( 

241 head: xr.DataArray, 

242 below_surfacelevel: bool = False, 

243 tolerance: pd.Timedelta = pd.Timedelta(days=7), 

244) -> xr.DataArray: 

245 """ 

246 Calculate GxG groundwater characteristics from head time series. 

247 

248 GLG and GHG (average lowest and average highest groundwater level respectively) are 

249 calculated as the average of the three lowest (GLG) or highest (GHG) head values per 

250 Dutch hydrological year (april - april), for head values measured at a semi-monthly frequency 

251 (14th and 28th of every month). GVG (average spring groundwater level) is calculated as 

252 the average of groundwater level on 14th and 28th of March, and 14th of April. Supplied head 

253 values are resampled (nearest) to the 14/28 frequency. 

254 

255 Hydrological years without all 24 14/28 dates present are discarded for glg and ghg. 

256 Years without the 3 dates for gvg are discarded. 

257 

258 Parameters 

259 ---------- 

260 head : xr.DataArray of floats 

261 Head relative to sea level, in m, or m below surface level if `below_surfacelevel` is 

262 set to True. Must be of dimensions ``("time", "y", "x")``. 

263 below_surfacelevel : boolean, optional, default: False. 

264 False (default) if heads are relative to a datum (e.g. sea level). If 

265 True, heads are taken as m below surface level. 

266 tolerance: pd.Timedelta, default: 7 days. 

267 Maximum time window allowed when searching for dates around the 14th 

268 and 28th of every month. 

269 

270 Returns 

271 ------- 

272 gxg : xr.Dataset 

273 Dataset containing ``glg``: average lowest head, ``ghg``: average 

274 highest head, ``gvg``: average spring head, ``n_years_gvg``: numbers of 

275 years used for gvg, ``n_years_gxg``: numbers of years used for glg and 

276 ghg. 

277 

278 Examples 

279 -------- 

280 Load the heads, and calculate groundwater characteristics after the year 2000: 

281 

282 >>> import imod 

283 >>> heads = imod.idf.open("head*.idf") 

284 >>> heads = heads.sel(time=heads.time.dt.year >= 2000, layer=1) 

285 >>> gxg = imod.evaluate.calculate_gxg(heads) 

286 

287 Transform to meters below surface level by substracting from surface level: 

288 

289 >>> surflevel = imod.idf.open("surfacelevel.idf") 

290 >>> gxg = surflevel - gxg 

291 

292 Or calculate from groundwater level relative to surface level directly: 

293 

294 >>> gwl = surflevel - heads 

295 >>> gxg = imod.evaluate.calculate_gxg(gwl, below_surfacelevel=True) 

296 

297 """ 

298 if not head.dims == ("time", "y", "x"): 

299 raise ValueError('Dimensions must be ("time", "y", "x")') 

300 if not np.issubdtype(head["time"].dtype, np.datetime64): 

301 raise ValueError("Time must have dtype numpy datetime64") 

302 

303 # Reindex to GxG frequency date_range: every 14th and 28th of the month. 

304 start = f"{int(head['time'][0].dt.year)}-01-01" 

305 end = f"{int(head['time'][-1].dt.year)}-12-31" 

306 dates = pd.date_range(start=start, end=end, freq="SMS") + pd.DateOffset(days=13) 

307 head_bimonthly = head.reindex(time=dates, method="nearest", tolerance=tolerance) 

308 

309 gxg = _calculate_gxg(head_bimonthly, below_surfacelevel) 

310 return gxg