Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ common \ units.py: 95%
103 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
1"""
2This is a placeholder module. See github issue #30
4In the mt_metadata packaage, the standard is that units are described by all
5lower case strings.
7The dictionaries UNITS is keyed by these lower
8case strings.
10"""
12from collections import OrderedDict
13from typing import Annotated
15# =============================================================================
16# Import
17# =============================================================================
18import pandas as pd
19from loguru import logger
20from pydantic import AliasChoices, BaseModel, ConfigDict, Field
23# =============================================================================
25# Define SI prefixes
26prefixes = {
27 "": "",
28 "yotta": "Y",
29 "zetta": "Z",
30 "exa": "E",
31 "peta": "P",
32 "tera": "T",
33 "giga": "G",
34 "mega": "M",
35 "kilo": "k",
36 "hecto": "h",
37 "deca": "da",
38 "deci": "d",
39 "centi": "c",
40 "milli": "m",
41 "micro": "μ",
42 "nano": "n",
43 "pico": "p",
44 "femto": "f",
45 "atto": "a",
46 "zepto": "z",
47 "yocto": "y",
48}
50# Define base units
51base_units = {
52 "meter": {"symbol": "m", "description": "Unit of length", "unicode_symbol": "m"},
53 "kilogram": {"symbol": "kg", "description": "Unit of mass", "unicode_symbol": "kg"},
54 "second": {"symbol": "s", "description": "Unit of time", "unicode_symbol": "s"},
55 "ampere": {
56 "symbol": "A",
57 "description": "Unit of electric current",
58 "unicode_symbol": "A",
59 },
60 "kelvin": {
61 "symbol": "K",
62 "description": "Unit of thermodynamic temperature",
63 "unicode_symbol": "K",
64 },
65 "mole": {
66 "symbol": "mol",
67 "description": "Unit of amount of substance",
68 "unicode_symbol": "mol",
69 },
70 "candela": {
71 "symbol": "cd",
72 "description": "Unit of luminous intensity",
73 "unicode_symbol": "cd",
74 },
75 "radian": {
76 "symbol": "rad",
77 "description": "Unit of angle",
78 "unicode_symbol": "rad",
79 },
80}
82# Define derived units
83derived_units = {
84 "hertz": {
85 "symbol": "Hz",
86 "description": "Unit of frequency",
87 "unicode_symbol": "Hz",
88 },
89 "newton": {"symbol": "N", "description": "Unit of force", "unicode_symbol": "N"},
90 "joule": {"symbol": "J", "description": "Unit of energy", "unicode_symbol": "J"},
91 "watt": {"symbol": "W", "description": "Unit of power", "unicode_symbol": "W"},
92 "pascal": {
93 "symbol": "Pa",
94 "description": "Unit of pressure",
95 "unicode_symbol": "Pa",
96 },
97 "coulomb": {
98 "symbol": "C",
99 "description": "Unit of electric charge",
100 "unicode_symbol": "C",
101 },
102 "volt": {
103 "symbol": "V",
104 "description": "Unit of electric potential",
105 "unicode_symbol": "V",
106 },
107 "ohm": {
108 "symbol": "Ω",
109 "description": "Unit of electrical resistance",
110 "unicode_symbol": "\u03a9",
111 },
112 "siemens": {
113 "symbol": "S",
114 "description": "Unit of electrical conductance",
115 "unicode_symbol": "S",
116 },
117 "weber": {
118 "symbol": "Wb",
119 "description": "Unit of magnetic flux",
120 "unicode_symbol": "Wb",
121 },
122 "tesla": {
123 "symbol": "T",
124 "description": "Unit of magnetic flux density",
125 "unicode_symbol": "T",
126 },
127 "henry": {
128 "symbol": "H",
129 "description": "Unit of inductance",
130 "unicode_symbol": "H",
131 },
132}
134# Combine prefixes with base and derived units
135all_units = [
136 {
137 "name": "unknown",
138 "description": "unknown",
139 "symbol": "unknown",
140 "plot_label": "Unknown",
141 },
142 {
143 "name": "digital counts",
144 "description": "digital counts from data logger",
145 "symbol": "count",
146 "plot_label": "Digital Counts",
147 },
148 {
149 "name": "digital counts",
150 "description": "digital counts from data logger",
151 "symbol": "counts",
152 "plot_label": "Digital Counts",
153 },
154 {
155 "name": "samples",
156 "description": "number of samples",
157 "symbol": "samples",
158 "plot_label": "Samples",
159 },
160 {
161 "name": "celsius",
162 "description": "Unit of temperature",
163 "symbol": "C",
164 "plot_label": "Celsius",
165 },
166]
167for prefix_name, prefix_symbol in prefixes.items():
168 for unit_name, unit_details in {**base_units, **derived_units}.items():
169 if unit_details["symbol"].isupper():
170 unit_name = unit_name.capitalize()
171 all_units.append(
172 {
173 "name": f"{prefix_name}{unit_name}",
174 "symbol": f"{prefix_symbol}{unit_details['symbol']}",
175 "description": f"{prefix_name.capitalize()} {unit_details['description']}",
176 "plot_label": f"{prefix_symbol}{unit_details['unicode_symbol']}",
177 }
178 )
180# Convert to a pandas DataFrame
181UNITS_DF = pd.DataFrame(all_units)
184class Unit(BaseModel):
185 model_config = ConfigDict(
186 validate_assignment=True,
187 extra="allow",
188 use_enum_values=True,
189 coerce_numbers_to_str=True,
190 )
192 name: Annotated[str, Field(default=None, description="Common name of the unit.")]
193 description: Annotated[
194 str, Field(default=None, description="Description of the unit.")
195 ]
196 symbol: Annotated[
197 str,
198 Field(
199 default=None,
200 description="Symbol like representation of the unit",
201 validation_alias=AliasChoices("symbol", "abbrviation"),
202 ),
203 ]
204 plot_label: Annotated[
205 str, Field(default=None, description="Plot label of the unit.")
206 ]
208 def __str__(self):
209 lines = [
210 f"name: {self.name}",
211 f"description: {self.description}",
212 f"symbol: {self.symbol}",
213 f"plot_label: {self.plot_label}",
214 ]
215 return "\n".join(lines)
217 def __repr__(self):
218 return self.__str__()
220 def combine(self, other, separator="/"):
221 """
222 Combine two unit objects into a single string representation.
224 Parameters
225 ----------
226 other : Unit
227 The other unit object to combine with.
228 separator : str, optional
229 The separator to use between the two units, by default "/"
231 Returns
232 -------
233 str
234 Combined string representation of the two units.
235 """
236 if not isinstance(other, Unit):
237 raise TypeError("The other object must be an instance of the Unit class.")
239 if separator in ["/", "per", " per "]:
240 name_separator = " per "
241 symbol_separator = "/"
242 else:
243 name_separator = " "
244 symbol_separator = " "
246 combined_unit = Unit(
247 name=f"{self.name}{name_separator}{other.name}",
248 description=f"{self.description}{name_separator}{other.description}",
249 symbol=f"{self.symbol}{symbol_separator}{other.symbol}",
250 plot_label=f"{self.plot_label}{symbol_separator}{other.plot_label}",
251 )
252 return combined_unit
254 def to_dict(self):
255 return {
256 "name": self.name,
257 "description": self.description,
258 "symbol": self.symbol,
259 "plot_label": self.plot_label,
260 }
262 def from_dict(self, value):
263 for k, v in value.items():
264 setattr(self, k, v)
267def find_separator(unit_string: str) -> str | None:
268 """
269 Find the first separator in a unit string.
271 Parameters
272 ----------
273 unit_string : str
274 The unit string to search for separators.
276 Returns
277 -------
278 str
279 The first separator found in the unit string.
280 """
282 find_dict = {}
283 for sep in ["/", " per ", " "]:
284 find_dict[sep] = unit_string.find(sep)
285 # Sort the dictionary by the index of the separator in the unit string
286 # and return the first separator found
287 find_dict = OrderedDict(sorted(find_dict.items(), key=lambda item: item[1]))
288 for sep in find_dict.keys():
289 if find_dict[sep] > -1:
290 return sep
291 return
294def parse_unit_string(unit_string: str) -> list[dict] | None:
295 """
296 Parse a unit string into a list of units and separators, including nested units and brackets.
298 Parameters
299 ----------
300 unit_string : str
301 The unit string to parse (e.g., "mV nT/[km ohm]").
303 Returns
304 -------
305 list[dict]
306 A list of dictionaries, each containing a unit name and its separator.
307 Example: [{"name": "mV", "sep": " "}, {"name": "nT", "sep": "/"}, {"name": "[", "sep": None},
308 {"name": "km", "sep": " "}, {"name": "ohm", "sep": None}, {"name": "]", "sep": None}]
309 """
310 if not isinstance(unit_string, str):
311 raise TypeError("The unit_string must be a string.")
312 unit_string = unit_string.replace("[", "").replace("]", "")
313 result = []
314 separator = ""
315 while separator != None:
316 separator = find_separator(unit_string)
317 parts = unit_string.split(separator, 1)
318 if parts == []:
319 break
321 if parts[0].strip() not in ["", " per "]:
322 result.append({"name": parts[0].strip(), "sep": separator})
323 try:
324 unit_string = parts[1].strip()
325 except IndexError:
326 break
327 # change order of separators
328 if len(result) == 0:
329 raise ValueError("No unit found in the unit string.")
330 elif len(result) == 1:
331 return result
332 elif len(result) > 1:
333 new_result = [result[0].copy()]
334 new_result[0]["sep"] = ""
335 for index, entry in enumerate(result[1:], start=0):
336 new_result.append({"name": entry["name"], "sep": result[index]["sep"]})
337 return new_result
340def get_unit_object(unit: str, allow_none=True) -> Unit:
341 """
342 From the unit name or symbol return a Unit object.
343 This function will search the unit name, symbol and
344 plot_label for a match.
345 If the unit is not found, a KeyError will be raised.
346 If allow_none is True, None will be returned if the unit is not found.
349 Parameters
350 ----------
351 unit : str
352 name or symbol of the unit to search for.
353 allow_none : bool, optional
354 If the unit isn't found return an empty unit of unknons,
355 by default True
357 Returns
358 -------
359 Unit
360 Unit object with the unit name, symbol, description and plot_label.
362 Raises
363 ------
364 KeyError
365 If the unit is not found in the DataFrame.
366 """
367 # digital counts is a special case, as it is not in the UNITS_DF DataFrame
368 # but is used in the metadata. It is a placeholder for the unit of digital counts
369 if isinstance(unit, str):
370 if unit.lower() in ["digital counts", "counts", "digital count"]:
371 return get_unit_from_df("digital counts", allow_none=allow_none)
372 if unit in [None, ""]:
373 return Unit(
374 name="unknown",
375 description="unknown",
376 symbol="unknown",
377 plot_label="Unknown",
378 )
380 units_parts = parse_unit_string(unit)
381 if len(units_parts) == 1:
382 return get_unit_from_df(units_parts[0]["name"], allow_none=allow_none)
383 elif len(units_parts) == 0:
384 raise ValueError(f"No unit found in the unit string.")
385 elif len(units_parts) > 1:
386 unit = get_unit_from_df(units_parts[0]["name"], allow_none=allow_none)
387 for entry in units_parts[1:]:
388 unit = unit.combine(
389 get_unit_from_df(entry["name"], allow_none=allow_none),
390 separator=entry["sep"],
391 )
393 return unit
396def get_unit_from_df(value: str, allow_none=True) -> Unit:
397 """
398 Retrieve a row from the UNITS_DF DataFrame based on the unit's name or symbol.
400 Parameters
401 ----------
402 value : str
403 The name or symbol of the unit to search for.
405 Returns
406 -------
407 pd.Series
408 A row from the UNITS_DF DataFrame corresponding to the given name or symbol.
410 Raises
411 ------
412 KeyError
413 If the unit is not found in the DataFrame.
414 """
415 # First try exact match for symbol (case-sensitive) to handle prefixes correctly
416 # (e.g., 'mV' should match milliVolt, not megaVolt)
417 unit_row = UNITS_DF[
418 (UNITS_DF["name"].str.lower() == value.lower()) | (UNITS_DF["symbol"] == value)
419 ]
421 # If no exact match, try case-insensitive symbol match only for single-character inputs
422 # This handles cases like 'M' -> 'meter' while preserving 'ft' as unknown (not 'fT')
423 if unit_row.empty and len(value) == 1:
424 unit_row = UNITS_DF[UNITS_DF["symbol"].str.lower() == value.lower()]
426 # Check if a match was found
427 if not unit_row.empty:
428 return Unit(
429 **unit_row.iloc[0].to_dict()
430 ) # Return the first matching row as a Series
431 else:
432 if allow_none:
433 logger.warning(
434 f"Unit '{value}' not found in accepted units, setting to 'unknown'. "
435 "If this is an error raise an issue to add a unit. If an error needs "
436 "to be raised, set allow_none=False."
437 )
438 return Unit(
439 name="unknown",
440 description="unknown",
441 symbol="unknown",
442 plot_label="Unknown",
443 )
444 else:
445 raise KeyError(
446 f"Unit '{value}' not found in the UNITS_DF DataFrame. "
447 "If the units are real an need to be added raise an issue to add the unit."
448 )