Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ common \ units.py: 95%

103 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:11 -0800

1""" 

2This is a placeholder module. See github issue #30 

3 

4In the mt_metadata packaage, the standard is that units are described by all 

5lower case strings. 

6 

7The dictionaries UNITS is keyed by these lower 

8case strings. 

9 

10""" 

11 

12from collections import OrderedDict 

13from typing import Annotated 

14 

15# ============================================================================= 

16# Import 

17# ============================================================================= 

18import pandas as pd 

19from loguru import logger 

20from pydantic import AliasChoices, BaseModel, ConfigDict, Field 

21 

22 

23# ============================================================================= 

24 

25# Define SI prefixes 

26prefixes = { 

27 "": "", 

28 "yotta": "Y", 

29 "zetta": "Z", 

30 "exa": "E", 

31 "peta": "P", 

32 "tera": "T", 

33 "giga": "G", 

34 "mega": "M", 

35 "kilo": "k", 

36 "hecto": "h", 

37 "deca": "da", 

38 "deci": "d", 

39 "centi": "c", 

40 "milli": "m", 

41 "micro": "μ", 

42 "nano": "n", 

43 "pico": "p", 

44 "femto": "f", 

45 "atto": "a", 

46 "zepto": "z", 

47 "yocto": "y", 

48} 

49 

50# Define base units 

51base_units = { 

52 "meter": {"symbol": "m", "description": "Unit of length", "unicode_symbol": "m"}, 

53 "kilogram": {"symbol": "kg", "description": "Unit of mass", "unicode_symbol": "kg"}, 

54 "second": {"symbol": "s", "description": "Unit of time", "unicode_symbol": "s"}, 

55 "ampere": { 

56 "symbol": "A", 

57 "description": "Unit of electric current", 

58 "unicode_symbol": "A", 

59 }, 

60 "kelvin": { 

61 "symbol": "K", 

62 "description": "Unit of thermodynamic temperature", 

63 "unicode_symbol": "K", 

64 }, 

65 "mole": { 

66 "symbol": "mol", 

67 "description": "Unit of amount of substance", 

68 "unicode_symbol": "mol", 

69 }, 

70 "candela": { 

71 "symbol": "cd", 

72 "description": "Unit of luminous intensity", 

73 "unicode_symbol": "cd", 

74 }, 

75 "radian": { 

76 "symbol": "rad", 

77 "description": "Unit of angle", 

78 "unicode_symbol": "rad", 

79 }, 

80} 

81 

82# Define derived units 

83derived_units = { 

84 "hertz": { 

85 "symbol": "Hz", 

86 "description": "Unit of frequency", 

87 "unicode_symbol": "Hz", 

88 }, 

89 "newton": {"symbol": "N", "description": "Unit of force", "unicode_symbol": "N"}, 

90 "joule": {"symbol": "J", "description": "Unit of energy", "unicode_symbol": "J"}, 

91 "watt": {"symbol": "W", "description": "Unit of power", "unicode_symbol": "W"}, 

92 "pascal": { 

93 "symbol": "Pa", 

94 "description": "Unit of pressure", 

95 "unicode_symbol": "Pa", 

96 }, 

97 "coulomb": { 

98 "symbol": "C", 

99 "description": "Unit of electric charge", 

100 "unicode_symbol": "C", 

101 }, 

102 "volt": { 

103 "symbol": "V", 

104 "description": "Unit of electric potential", 

105 "unicode_symbol": "V", 

106 }, 

107 "ohm": { 

108 "symbol": "Ω", 

109 "description": "Unit of electrical resistance", 

110 "unicode_symbol": "\u03a9", 

111 }, 

112 "siemens": { 

113 "symbol": "S", 

114 "description": "Unit of electrical conductance", 

115 "unicode_symbol": "S", 

116 }, 

117 "weber": { 

118 "symbol": "Wb", 

119 "description": "Unit of magnetic flux", 

120 "unicode_symbol": "Wb", 

121 }, 

122 "tesla": { 

123 "symbol": "T", 

124 "description": "Unit of magnetic flux density", 

125 "unicode_symbol": "T", 

126 }, 

127 "henry": { 

128 "symbol": "H", 

129 "description": "Unit of inductance", 

130 "unicode_symbol": "H", 

131 }, 

132} 

133 

134# Combine prefixes with base and derived units 

135all_units = [ 

136 { 

137 "name": "unknown", 

138 "description": "unknown", 

139 "symbol": "unknown", 

140 "plot_label": "Unknown", 

141 }, 

142 { 

143 "name": "digital counts", 

144 "description": "digital counts from data logger", 

145 "symbol": "count", 

146 "plot_label": "Digital Counts", 

147 }, 

148 { 

149 "name": "digital counts", 

150 "description": "digital counts from data logger", 

151 "symbol": "counts", 

152 "plot_label": "Digital Counts", 

153 }, 

154 { 

155 "name": "samples", 

156 "description": "number of samples", 

157 "symbol": "samples", 

158 "plot_label": "Samples", 

159 }, 

160 { 

161 "name": "celsius", 

162 "description": "Unit of temperature", 

163 "symbol": "C", 

164 "plot_label": "Celsius", 

165 }, 

166] 

167for prefix_name, prefix_symbol in prefixes.items(): 

168 for unit_name, unit_details in {**base_units, **derived_units}.items(): 

169 if unit_details["symbol"].isupper(): 

170 unit_name = unit_name.capitalize() 

171 all_units.append( 

172 { 

173 "name": f"{prefix_name}{unit_name}", 

174 "symbol": f"{prefix_symbol}{unit_details['symbol']}", 

175 "description": f"{prefix_name.capitalize()} {unit_details['description']}", 

176 "plot_label": f"{prefix_symbol}{unit_details['unicode_symbol']}", 

177 } 

178 ) 

179 

180# Convert to a pandas DataFrame 

181UNITS_DF = pd.DataFrame(all_units) 

182 

183 

184class Unit(BaseModel): 

185 model_config = ConfigDict( 

186 validate_assignment=True, 

187 extra="allow", 

188 use_enum_values=True, 

189 coerce_numbers_to_str=True, 

190 ) 

191 

192 name: Annotated[str, Field(default=None, description="Common name of the unit.")] 

193 description: Annotated[ 

194 str, Field(default=None, description="Description of the unit.") 

195 ] 

196 symbol: Annotated[ 

197 str, 

198 Field( 

199 default=None, 

200 description="Symbol like representation of the unit", 

201 validation_alias=AliasChoices("symbol", "abbrviation"), 

202 ), 

203 ] 

204 plot_label: Annotated[ 

205 str, Field(default=None, description="Plot label of the unit.") 

206 ] 

207 

208 def __str__(self): 

209 lines = [ 

210 f"name: {self.name}", 

211 f"description: {self.description}", 

212 f"symbol: {self.symbol}", 

213 f"plot_label: {self.plot_label}", 

214 ] 

215 return "\n".join(lines) 

216 

217 def __repr__(self): 

218 return self.__str__() 

219 

220 def combine(self, other, separator="/"): 

221 """ 

222 Combine two unit objects into a single string representation. 

223 

224 Parameters 

225 ---------- 

226 other : Unit 

227 The other unit object to combine with. 

228 separator : str, optional 

229 The separator to use between the two units, by default "/" 

230 

231 Returns 

232 ------- 

233 str 

234 Combined string representation of the two units. 

235 """ 

236 if not isinstance(other, Unit): 

237 raise TypeError("The other object must be an instance of the Unit class.") 

238 

239 if separator in ["/", "per", " per "]: 

240 name_separator = " per " 

241 symbol_separator = "/" 

242 else: 

243 name_separator = " " 

244 symbol_separator = " " 

245 

246 combined_unit = Unit( 

247 name=f"{self.name}{name_separator}{other.name}", 

248 description=f"{self.description}{name_separator}{other.description}", 

249 symbol=f"{self.symbol}{symbol_separator}{other.symbol}", 

250 plot_label=f"{self.plot_label}{symbol_separator}{other.plot_label}", 

251 ) 

252 return combined_unit 

253 

254 def to_dict(self): 

255 return { 

256 "name": self.name, 

257 "description": self.description, 

258 "symbol": self.symbol, 

259 "plot_label": self.plot_label, 

260 } 

261 

262 def from_dict(self, value): 

263 for k, v in value.items(): 

264 setattr(self, k, v) 

265 

266 

267def find_separator(unit_string: str) -> str | None: 

268 """ 

269 Find the first separator in a unit string. 

270 

271 Parameters 

272 ---------- 

273 unit_string : str 

274 The unit string to search for separators. 

275 

276 Returns 

277 ------- 

278 str 

279 The first separator found in the unit string. 

280 """ 

281 

282 find_dict = {} 

283 for sep in ["/", " per ", " "]: 

284 find_dict[sep] = unit_string.find(sep) 

285 # Sort the dictionary by the index of the separator in the unit string 

286 # and return the first separator found 

287 find_dict = OrderedDict(sorted(find_dict.items(), key=lambda item: item[1])) 

288 for sep in find_dict.keys(): 

289 if find_dict[sep] > -1: 

290 return sep 

291 return 

292 

293 

294def parse_unit_string(unit_string: str) -> list[dict] | None: 

295 """ 

296 Parse a unit string into a list of units and separators, including nested units and brackets. 

297 

298 Parameters 

299 ---------- 

300 unit_string : str 

301 The unit string to parse (e.g., "mV nT/[km ohm]"). 

302 

303 Returns 

304 ------- 

305 list[dict] 

306 A list of dictionaries, each containing a unit name and its separator. 

307 Example: [{"name": "mV", "sep": " "}, {"name": "nT", "sep": "/"}, {"name": "[", "sep": None}, 

308 {"name": "km", "sep": " "}, {"name": "ohm", "sep": None}, {"name": "]", "sep": None}] 

309 """ 

310 if not isinstance(unit_string, str): 

311 raise TypeError("The unit_string must be a string.") 

312 unit_string = unit_string.replace("[", "").replace("]", "") 

313 result = [] 

314 separator = "" 

315 while separator != None: 

316 separator = find_separator(unit_string) 

317 parts = unit_string.split(separator, 1) 

318 if parts == []: 

319 break 

320 

321 if parts[0].strip() not in ["", " per "]: 

322 result.append({"name": parts[0].strip(), "sep": separator}) 

323 try: 

324 unit_string = parts[1].strip() 

325 except IndexError: 

326 break 

327 # change order of separators 

328 if len(result) == 0: 

329 raise ValueError("No unit found in the unit string.") 

330 elif len(result) == 1: 

331 return result 

332 elif len(result) > 1: 

333 new_result = [result[0].copy()] 

334 new_result[0]["sep"] = "" 

335 for index, entry in enumerate(result[1:], start=0): 

336 new_result.append({"name": entry["name"], "sep": result[index]["sep"]}) 

337 return new_result 

338 

339 

340def get_unit_object(unit: str, allow_none=True) -> Unit: 

341 """ 

342 From the unit name or symbol return a Unit object. 

343 This function will search the unit name, symbol and 

344 plot_label for a match. 

345 If the unit is not found, a KeyError will be raised. 

346 If allow_none is True, None will be returned if the unit is not found. 

347 

348 

349 Parameters 

350 ---------- 

351 unit : str 

352 name or symbol of the unit to search for. 

353 allow_none : bool, optional 

354 If the unit isn't found return an empty unit of unknons, 

355 by default True 

356 

357 Returns 

358 ------- 

359 Unit 

360 Unit object with the unit name, symbol, description and plot_label. 

361 

362 Raises 

363 ------ 

364 KeyError 

365 If the unit is not found in the DataFrame. 

366 """ 

367 # digital counts is a special case, as it is not in the UNITS_DF DataFrame 

368 # but is used in the metadata. It is a placeholder for the unit of digital counts 

369 if isinstance(unit, str): 

370 if unit.lower() in ["digital counts", "counts", "digital count"]: 

371 return get_unit_from_df("digital counts", allow_none=allow_none) 

372 if unit in [None, ""]: 

373 return Unit( 

374 name="unknown", 

375 description="unknown", 

376 symbol="unknown", 

377 plot_label="Unknown", 

378 ) 

379 

380 units_parts = parse_unit_string(unit) 

381 if len(units_parts) == 1: 

382 return get_unit_from_df(units_parts[0]["name"], allow_none=allow_none) 

383 elif len(units_parts) == 0: 

384 raise ValueError(f"No unit found in the unit string.") 

385 elif len(units_parts) > 1: 

386 unit = get_unit_from_df(units_parts[0]["name"], allow_none=allow_none) 

387 for entry in units_parts[1:]: 

388 unit = unit.combine( 

389 get_unit_from_df(entry["name"], allow_none=allow_none), 

390 separator=entry["sep"], 

391 ) 

392 

393 return unit 

394 

395 

396def get_unit_from_df(value: str, allow_none=True) -> Unit: 

397 """ 

398 Retrieve a row from the UNITS_DF DataFrame based on the unit's name or symbol. 

399 

400 Parameters 

401 ---------- 

402 value : str 

403 The name or symbol of the unit to search for. 

404 

405 Returns 

406 ------- 

407 pd.Series 

408 A row from the UNITS_DF DataFrame corresponding to the given name or symbol. 

409 

410 Raises 

411 ------ 

412 KeyError 

413 If the unit is not found in the DataFrame. 

414 """ 

415 # First try exact match for symbol (case-sensitive) to handle prefixes correctly 

416 # (e.g., 'mV' should match milliVolt, not megaVolt) 

417 unit_row = UNITS_DF[ 

418 (UNITS_DF["name"].str.lower() == value.lower()) | (UNITS_DF["symbol"] == value) 

419 ] 

420 

421 # If no exact match, try case-insensitive symbol match only for single-character inputs 

422 # This handles cases like 'M' -> 'meter' while preserving 'ft' as unknown (not 'fT') 

423 if unit_row.empty and len(value) == 1: 

424 unit_row = UNITS_DF[UNITS_DF["symbol"].str.lower() == value.lower()] 

425 

426 # Check if a match was found 

427 if not unit_row.empty: 

428 return Unit( 

429 **unit_row.iloc[0].to_dict() 

430 ) # Return the first matching row as a Series 

431 else: 

432 if allow_none: 

433 logger.warning( 

434 f"Unit '{value}' not found in accepted units, setting to 'unknown'. " 

435 "If this is an error raise an issue to add a unit. If an error needs " 

436 "to be raised, set allow_none=False." 

437 ) 

438 return Unit( 

439 name="unknown", 

440 description="unknown", 

441 symbol="unknown", 

442 plot_label="Unknown", 

443 ) 

444 else: 

445 raise KeyError( 

446 f"Unit '{value}' not found in the UNITS_DF DataFrame. " 

447 "If the units are real an need to be added raise an issue to add the unit." 

448 )