Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ transfer_functions \ io \ emtfxml \ metadata \ site.py: 98%

83 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:11 -0800

1# ===================================================== 

2# Imports 

3# ===================================================== 

4from typing import Annotated 

5from xml.etree import cElementTree as et 

6 

7import numpy as np 

8import pandas as pd 

9from pydantic import Field, field_validator 

10 

11from mt_metadata.base import MetadataBase 

12from mt_metadata.common import BasicLocationNoDatum, Comment 

13from mt_metadata.common.mttime import MTime 

14from mt_metadata.transfer_functions.io.emtfxml.metadata import helpers 

15 

16from . import DataQualityNotes, DataQualityWarnings, Orientation 

17 

18 

19# ===================================================== 

20class Site(MetadataBase): 

21 project: Annotated[ 

22 str, 

23 Field( 

24 default="", 

25 description="Name of the project", 

26 alias=None, 

27 pattern="^[a-zA-Z0-9-_]*$", 

28 json_schema_extra={ 

29 "units": None, 

30 "required": True, 

31 "examples": ["USMTArray"], 

32 }, 

33 ), 

34 ] 

35 

36 survey: Annotated[ 

37 str, 

38 Field( 

39 default="", 

40 description="Name of the survey", 

41 alias=None, 

42 json_schema_extra={ 

43 "units": None, 

44 "required": True, 

45 "examples": ["MT 2020"], 

46 }, 

47 ), 

48 ] 

49 

50 year_collected: Annotated[ 

51 MTime | str | float | int | np.datetime64 | pd.Timestamp, 

52 Field( 

53 default=None, 

54 description="Year data collected", 

55 alias=None, 

56 json_schema_extra={ 

57 "units": None, 

58 "required": True, 

59 "examples": ["2020"], 

60 }, 

61 ), 

62 ] 

63 

64 country: Annotated[ 

65 str, 

66 Field( 

67 default="", 

68 description="Country where data was collected", 

69 alias=None, 

70 json_schema_extra={ 

71 "units": None, 

72 "required": False, 

73 "examples": ["USA"], 

74 }, 

75 ), 

76 ] 

77 

78 id: Annotated[ 

79 str, 

80 Field( 

81 default="", 

82 description="Station ID name. This should be an alpha numeric name that is typically 5-6 characters long. Commonly the project name in 2 or 3 letters and the station number.", 

83 alias=None, 

84 pattern="^[a-zA-Z0-9]*$", 

85 json_schema_extra={ 

86 "units": None, 

87 "required": True, 

88 "examples": ["MT001"], 

89 }, 

90 ), 

91 ] 

92 

93 name: Annotated[ 

94 str, 

95 Field( 

96 default="", 

97 description="closest geographic name to the station", 

98 alias=None, 

99 json_schema_extra={ 

100 "units": None, 

101 "required": True, 

102 "examples": ['"Whitehorse, YK"'], 

103 }, 

104 ), 

105 ] 

106 

107 acquired_by: Annotated[ 

108 str, 

109 Field( 

110 default="", 

111 description="Person or group who collected the data", 

112 alias=None, 

113 json_schema_extra={ 

114 "units": None, 

115 "required": True, 

116 "examples": ["MT Group"], 

117 }, 

118 ), 

119 ] 

120 

121 start: Annotated[ 

122 MTime | str | float | int | np.datetime64 | pd.Timestamp, 

123 Field( 

124 default_factory=lambda: MTime(time_stamp=None), 

125 description="Date time when the data collection started", 

126 alias=None, 

127 json_schema_extra={ 

128 "units": None, 

129 "required": True, 

130 "examples": ["2020-01-01T12:00:00"], 

131 }, 

132 ), 

133 ] 

134 

135 end: Annotated[ 

136 MTime | str | float | int | np.datetime64 | pd.Timestamp, 

137 Field( 

138 default_factory=lambda: MTime(time_stamp=None), 

139 description="Date time when the data collection ended", 

140 alias=None, 

141 json_schema_extra={ 

142 "units": None, 

143 "required": True, 

144 "examples": ["2020-05-01T12:00:00"], 

145 }, 

146 ), 

147 ] 

148 

149 run_list: Annotated[ 

150 list[str] | None, 

151 Field( 

152 default_factory=list, 

153 description="list of runs recorded by the station. Should be a summary of all runs recorded", 

154 alias=None, 

155 json_schema_extra={ 

156 "units": None, 

157 "required": True, 

158 "examples": ['"[ mt001a, mt001b, mt001c ]"'], 

159 }, 

160 ), 

161 ] 

162 

163 data_quality_notes: Annotated[ 

164 DataQualityNotes, 

165 Field( 

166 default_factory=DataQualityNotes, # type: ignore 

167 description="Notes on the data quality", 

168 alias=None, 

169 json_schema_extra={ 

170 "units": None, 

171 "required": False, 

172 "examples": ["Data quality is good"], 

173 }, 

174 ), 

175 ] 

176 

177 data_quality_warnings: Annotated[ 

178 DataQualityWarnings, 

179 Field( 

180 default_factory=DataQualityWarnings, # type: ignore 

181 description="Warnings about the data quality", 

182 alias=None, 

183 json_schema_extra={ 

184 "units": None, 

185 "required": False, 

186 "examples": ["Data quality is questionable"], 

187 }, 

188 ), 

189 ] 

190 orientation: Annotated[ 

191 Orientation, 

192 Field( 

193 default_factory=Orientation, # type: ignore 

194 description="Orientation of the site", 

195 alias=None, 

196 json_schema_extra={ 

197 "units": None, 

198 "required": False, 

199 "examples": [ 

200 "Orientation('layout=orthogonal, angle_to_geographic_north=0.0')" 

201 ], 

202 }, 

203 ), 

204 ] 

205 

206 location: Annotated[ 

207 BasicLocationNoDatum, 

208 Field( 

209 default_factory=BasicLocationNoDatum, # type: ignore 

210 description="Location of the site", 

211 alias=None, 

212 json_schema_extra={ 

213 "units": None, 

214 "required": False, 

215 "examples": ["BasicLocation('latitude=60.0, longitude=-135.0')"], 

216 }, 

217 ), 

218 ] 

219 

220 comments: Annotated[ 

221 Comment | str | None, 

222 Field( 

223 default_factory=Comment, # type: ignore 

224 description="Comments about the site", 

225 alias=None, 

226 json_schema_extra={ 

227 "units": None, 

228 "required": False, 

229 "examples": ["Comment('This is a comment about the site')"], 

230 }, 

231 ), 

232 ] 

233 

234 @field_validator("start", "end", mode="before") 

235 @classmethod 

236 def validate_start( 

237 cls, field_value: MTime | float | int | np.datetime64 | pd.Timestamp | str 

238 ): 

239 if isinstance(field_value, MTime): 

240 return field_value 

241 return MTime(time_stamp=field_value) 

242 

243 @field_validator("year_collected", mode="before") 

244 @classmethod 

245 def validate_year_collected( 

246 cls, field_value: MTime | float | int | np.datetime64 | pd.Timestamp | str 

247 ): 

248 if isinstance(field_value, str): 

249 if field_value.count("-") == 1: 

250 field_value = field_value.split("-")[0] 

251 if isinstance(field_value, MTime): 

252 return field_value.year 

253 if isinstance(field_value, int): 

254 # If it's already an integer year, return as-is 

255 return field_value 

256 return MTime(time_stamp=field_value).year 

257 

258 @field_validator("id", "project", "survey", "name", "acquired_by", mode="before") 

259 @classmethod 

260 def validate_string_fields(cls, field_value: str | None) -> str: 

261 """ 

262 Validate string fields, converting None to empty string. 

263 """ 

264 if field_value is None: 

265 return "" 

266 return str(field_value) 

267 

268 @field_validator("comments", mode="before") 

269 @classmethod 

270 def validate_comments(cls, value) -> Comment: 

271 """ 

272 Validate that the value is a valid string. 

273 """ 

274 if isinstance(value, str): 

275 return Comment(value=value) # type: ignore[return-value] 

276 return value 

277 

278 @field_validator("run_list", mode="before") 

279 @classmethod 

280 def validate_run_list(cls, value: str | list[str] | None) -> list[str] | None: 

281 """ 

282 Validate that the value is a list of strings. 

283 """ 

284 if value is None: 

285 return None 

286 if isinstance(value, str): 

287 if value.count("[") > 0 and value.count("]") > 0: 

288 # Handle string representation of a list 

289 value = value.strip("[]") 

290 

291 if value.count(",") > 0: 

292 return value.split(",") 

293 elif value.count(" ") > 0: 

294 # Split by space if no commas are present 

295 return value.split(" ") 

296 if value == "": 

297 return [] 

298 return [value] # Return as a single-item list if no commas or spaces 

299 

300 elif isinstance(value, list) and all(isinstance(item, str) for item in value): 

301 return value 

302 raise ValueError("run_list must be a list of strings.") 

303 

304 def read_dict(self, input_dict: dict) -> None: 

305 """ 

306 Read the input dictionary and update the object's attributes. 

307 

308 Parameters 

309 ---------- 

310 input_dict : dict 

311 The input dictionary containing the data to read. 

312 """ 

313 for element in input_dict["site"].keys(): 

314 attr = getattr(self, element) 

315 if hasattr(attr, "read_dict"): 

316 attr.read_dict(input_dict["site"]) 

317 else: 

318 helpers._read_single(self, input_dict["site"], element) 

319 

320 def to_xml(self, string: bool = False, required: bool = True) -> str | et.Element: 

321 """ 

322 Convert the object to XML format. 

323 

324 Parameters 

325 ---------- 

326 string : bool, optional 

327 Whether to return the XML as a string, by default False 

328 required : bool, optional 

329 Whether the XML is required, by default True 

330 

331 Returns 

332 ------- 

333 str | et.Element 

334 The XML representation of the object. 

335 """ 

336 

337 if self.end < self.start: # type: ignore 

338 self.end = self.start 

339 

340 return helpers.to_xml( 

341 self, 

342 string=string, 

343 required=required, 

344 order=[ 

345 "project", 

346 "survey", 

347 "year_collected", 

348 "country", 

349 "id", 

350 "name", 

351 "location", 

352 "orientation", 

353 "acquired_by", 

354 "start", 

355 "end", 

356 "run_list", 

357 "data_quality_notes", 

358 "data_quality_warnings", 

359 ], 

360 )