Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ base \ metadata.py: 83%
429 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
1# -*- coding: utf-8 -*-
2"""
3Created on Wed Dec 23 20:41:16 2020
5:copyright:
6 Jared Peacock (jpeacock@usgs.gov)
8:license: MIT
10"""
11from __future__ import annotations
13import json
14from collections import OrderedDict
15from enum import Enum
17# =============================================================================
18# Imports
19# =============================================================================
20from operator import itemgetter
21from pathlib import Path
22from typing import Any, Mapping
23from xml.etree import cElementTree as et
25import numpy as np
26import pandas as pd
27from loguru import logger
28from pydantic import (
29 BaseModel,
30 computed_field,
31 ConfigDict,
32 create_model,
33 field_validator,
34 model_validator,
35)
36from pydantic.fields import FieldInfo, PrivateAttr
37from typing_extensions import deprecated
39from mt_metadata import NULL_VALUES
40from mt_metadata.utils.exceptions import MTSchemaError
41from mt_metadata.utils.validators import validate_attribute, validate_name
43from . import helpers, pydantic_helpers
46# =============================================================================
47# Base class that everything else will inherit
48# =============================================================================
51@deprecated("Base is deprecated, use MetadataBase instead")
52class Base:
53 pass
56class DotNotationBaseModel(BaseModel):
57 """
58 Base model that supports dot notation for setting nested attributes.
60 This model extends Pydantic's BaseModel to allow setting nested attributes
61 using dot notation (e.g., 'location.latitude' or 'time_period.start').
62 It automatically handles both flat and nested dictionary structures.
64 Parameters
65 ----------
66 **data : Any
67 Keyword arguments representing field values. Supports both flat keys
68 and dot-notation keys for nested attributes.
70 Examples
71 --------
72 >>> model = DotNotationBaseModel(**{"location.latitude": 45.0})
73 >>> model = DotNotationBaseModel(**{"location": {"latitude": 45.0}})
74 """
76 def __init__(self, **data: Any) -> None:
77 # Process dot notation fields first
78 flat_data = {}
79 nested_data = {}
81 for key, value in data.items():
82 if "." in key:
83 # This is a dotted field, handle specially
84 self._set_nested_attribute(nested_data, key, value)
85 else:
86 # Regular field, pass to Pydantic as-is
87 if key == validate_name(self.__class__.__name__):
88 if isinstance(value, dict):
89 # If the value is a dict, we need to flatten it
90 for nested_key, nested_value in value.items():
91 if isinstance(nested_value, dict):
92 # Flatten nested dicts
93 self._set_nested_attribute(
94 nested_data, nested_key, nested_value
95 )
96 else:
97 flat_data[nested_key] = nested_value
98 else:
99 # Non-dict value for class name key should be treated as regular field
100 flat_data[key] = value
101 else:
102 flat_data[key] = value
104 # Merge the nested dict into flat dict (nested takes precedence)
105 flat_data.update(nested_data)
107 # Call parent constructor with processed data
108 super().__init__(**flat_data)
110 def _set_nested_attribute(
111 self, data_dict: dict[str, Any], dotted_key: str, value: Any
112 ) -> None:
113 """
114 Set a nested attribute in data_dict based on dotted key notation.
116 Parameters
117 ----------
118 data_dict : dict[str, Any]
119 Dictionary to populate with nested structure
120 dotted_key : str
121 Dot-separated key path (e.g., 'time_period.start')
122 value : Any
123 Value to set at the nested location
125 Examples
126 --------
127 >>> data = {}
128 >>> model._set_nested_attribute(data, "time_period.start", "2020-01-01")
129 >>> # Results in: {"time_period": {"start": "2020-01-01"}}
130 """
131 parts = dotted_key.split(".")
132 current = data_dict
134 # Navigate to the deepest level, creating dicts along the way
135 for i, part in enumerate(parts[:-1]):
136 if part not in current:
137 current[part] = {}
138 elif not isinstance(current[part], dict):
139 # Convert to dict if it's not already
140 current[part] = {}
141 current = current[part]
143 # Set the final value
144 current[parts[-1]] = value
146 def update_attribute(self, attr_name: str, attr_value: Any) -> None:
147 """
148 Update a nested attribute using dot notation.
150 Parameters
151 ----------
152 attr_name : str
153 Name of the attribute to update, supports dot notation for
154 nested attributes (e.g., 'time_period.start')
155 attr_value : Any
156 New value for the attribute
158 Raises
159 ------
160 AttributeError
161 If the attribute path does not exist
163 Examples
164 --------
165 >>> model.update_attribute("time_period.start", "2020-01-01")
166 >>> model.update_attribute("latitude", 45.0)
167 """
168 if "." not in attr_name:
169 # Directly set the attribute
170 setattr(self, attr_name, attr_value)
171 return
173 # For nested attributes, we need to navigate the object graph
174 parts = attr_name.split(".")
175 current = self
177 # Navigate to the deepest level
178 for part in parts[:-1]:
179 if not hasattr(current, part):
180 raise AttributeError(
181 f"'{type(current).__name__}' has no attribute '{part}'"
182 )
183 current = getattr(current, part)
185 # Set the final attribute
186 setattr(current, parts[-1], attr_value)
187 setattr(current, parts[-1], attr_value)
190class MetadataBase(DotNotationBaseModel):
191 """
192 Base class for all metadata objects with Pydantic validation.
194 MetadataBase extends DotNotationBaseModel (which inherits from Pydantic's
195 BaseModel) to provide automatic validation according to metadata standards.
196 It adds functionality beyond dictionaries, supporting JSON, XML, pandas
197 Series, and other formats for metadata interchange.
199 Attributes
200 ----------
201 _skip_equals : list[str]
202 Private attribute listing fields to skip in equality comparisons
203 _fields : dict[str, Any]
204 Private attribute caching field information
206 Notes
207 -----
208 - All field assignments are validated automatically via Pydantic
209 - None values are converted to appropriate defaults (empty string or 0.0)
210 - Supports nested attribute access via dot notation
211 - Thread-safe for read operations after initialization
212 """
214 model_config = ConfigDict(
215 validate_assignment=True,
216 use_attribute_docstrings=True,
217 extra="allow",
218 arbitrary_types_allowed=True, # need this for numpy and pd types
219 use_enum_values=True,
220 coerce_numbers_to_str=True,
221 )
223 _skip_equals: list[str] = PrivateAttr(["processed_date", "creation_time"])
224 _fields: dict[str, Any] = PrivateAttr(default_factory=dict)
226 @model_validator(mode="before")
227 @classmethod
228 def convert_none_to_empty(cls, values):
229 """Convert None values to empty strings or 0.0 for numeric fields, except for fields that explicitly default to None."""
230 # Ensure values is a dictionary before processing
231 if not isinstance(values, dict):
232 return values
234 for field, field_info in cls.model_fields.items():
235 # Skip conversion if the field's default is explicitly None
236 if field_info.default is None:
237 continue
239 # Only process fields that are in the input values and are None
240 if field in values and values[field] is None:
241 try:
242 annotation = field_info.annotation
243 # Convert None to empty string for str fields
244 if annotation is str:
245 values[field] = ""
246 # Convert None to 0.0 for float/int fields
247 elif annotation in (float, int):
248 values[field] = 0.0
249 except (AttributeError, TypeError):
250 # If there's any issue checking the annotation, skip conversion
251 pass
252 return values
254 @field_validator("*", mode="before")
255 @classmethod
256 def validate_none_on_assignment(cls, value: Any, info: Any) -> Any:
257 """
258 Convert None values to appropriate defaults when attributes are set.
260 This validator runs for all fields due to 'validate_assignment=True' in
261 model config. It works generically for string and numeric fields without
262 requiring subclass-specific validators.
264 Parameters
265 ----------
266 value : Any
267 The value being assigned to the field
268 info : Any
269 Pydantic validation info containing field name and metadata
271 Returns
272 -------
273 Any
274 Converted value (empty string for str, 0.0 for numeric) or original value
276 Notes
277 -----
278 - For complex types, skips conversion and lets Pydantic handle validation
279 - Does NOT convert None if the field explicitly has None as its default
280 - Conversion rules: str -> '', float/int -> 0.0
281 """
282 if value is None:
283 field_name = info.field_name
284 # Get field info from the class model fields
285 if field_name in cls.model_fields:
286 field_info = cls.model_fields[field_name]
288 # Skip conversion if the field's default is explicitly None
289 if field_info.default is None:
290 return value
292 # Only attempt conversion for primitive types
293 try:
294 # Check the annotation, handling both direct types and Annotated types
295 annotation = field_info.annotation
297 # Convert None to empty string for str fields
298 if annotation is str:
299 return ""
300 # Convert None to 0.0 for float/int fields
301 elif annotation in (float, int):
302 return 0.0
303 except (AttributeError, TypeError):
304 # If there's any issue checking the annotation, let Pydantic handle it normally
305 pass
306 return value
308 @computed_field
309 @property
310 def _class_name(self) -> str:
311 return validate_attribute(self.__class__.__name__)
313 def __str__(self) -> str:
314 """
316 :return: table describing attributes
317 :rtype: string
319 """
320 return str(self.model_dump())
322 def __repr__(self) -> str:
323 return self.to_json()
325 def __eq__(
326 self, other: "MetadataBase" | dict | str | pd.Series | et.Element
327 ) -> bool:
328 """
329 Compare this metadata object with another for equality.
331 This method supports comparison with various types by converting them
332 to MetadataBase objects first. Uses Pydantic's equality comparison after
333 loading the other object.
335 Parameters
336 ----------
337 other : MetadataBase | dict | str | pd.Series | et.Element
338 Object to compare with. Can be:
339 - Another MetadataBase instance
340 - Dictionary with metadata
341 - JSON string
342 - Pandas Series
343 - XML Element
345 Returns
346 -------
347 bool
348 True if objects are equal, False otherwise
350 Notes
351 -----
352 The following Pydantic equality logic is used:
354 if isinstance(other, BaseModel):
355 # When comparing instances of generic types for equality, as long as all field values are equal,
356 # only require their generic origin types to be equal, rather than exact type equality.
357 # This prevents headaches like MyGeneric(x=1) != MyGeneric[Any](x=1).
358 self_type = self.__pydantic_generic_metadata__['origin'] or self.__class__
359 other_type = other.__pydantic_generic_metadata__['origin'] or other.__class__
361 # Perform common checks first
362 if not (
363 self_type == other_type
364 and getattr(self, '__pydantic_private__', None) == getattr(other, '__pydantic_private__', None)
365 and self.__pydantic_extra__ == other.__pydantic_extra__
366 ):
367 return False
369 # We only want to compare pydantic fields but ignoring fields is costly.
370 # We'll perform a fast check first, and fallback only when needed
371 # See GH-7444 and GH-7825 for rationale and a performance benchmark
373 # First, do the fast (and sometimes faulty) __dict__ comparison
374 if self.__dict__ == other.__dict__:
375 # If the check above passes, then pydantic fields are equal, we can return early
376 return True
378 # We don't want to trigger unnecessary costly filtering of __dict__ on all unequal objects, so we return
379 # early if there are no keys to ignore (we would just return False later on anyway)
380 model_fields = type(self).model_fields.keys()
381 if self.__dict__.keys() <= model_fields and other.__dict__.keys() <= model_fields:
382 return False
384 # If we reach here, there are non-pydantic-fields keys, mapped to unequal values, that we need to ignore
385 # Resort to costly filtering of the __dict__ objects
386 # We use operator.itemgetter because it is much faster than dict comprehensions
387 # NOTE: Contrary to standard python class and instances, when the Model class has a default value for an
388 # attribute and the model instance doesn't have a corresponding attribute, accessing the missing attribute
389 # raises an error in BaseModel.__getattr__ instead of returning the class attribute
390 # So we can use operator.itemgetter() instead of operator.attrgetter()
391 getter = operator.itemgetter(*model_fields) if model_fields else lambda _: _utils._SENTINEL
392 try:
393 return getter(self.__dict__) == getter(other.__dict__)
394 except KeyError:
395 # In rare cases (such as when using the deprecated BaseModel.copy() method),
396 # the __dict__ may not contain all model fields, which is how we can get here.
397 # getter(self.__dict__) is much faster than any 'safe' method that accounts
398 # for missing keys, and wrapping it in a `try` doesn't slow things down much
399 # in the common case.
400 self_fields_proxy = _utils.SafeGetItemProxy(self.__dict__)
401 other_fields_proxy = _utils.SafeGetItemProxy(other.__dict__)
402 return getter(self_fields_proxy) == getter(other_fields_proxy)
404 # other instance is not a BaseModel
405 else:
406 return NotImplemented # delegate to the other item in the comparison
407 """
408 if other in [None]:
409 return False
411 elif isinstance(other, (dict, str, pd.Series, et.Element)):
412 try:
413 # Attempt to load the other object into a new instance of MetadataBase
414 # This will ensure that the other object has the same attributes as self
415 other_obj = __class__().load(other)
416 except Exception as e:
417 logger.error(
418 f"Failed to load other object of type {type(other)}: {other}. Error is: {e} "
419 )
420 return False
421 if not other_obj:
422 return False
424 if hasattr(other_obj, "to_dict") and callable(other_obj.to_dict):
425 other_dict = other_obj.to_dict(single=True, required=False)
426 else:
427 return False
429 elif isinstance(other, MetadataBase):
430 other_dict = other.to_dict(single=True, required=False)
431 else:
432 raise ValueError(
433 f"Cannot compare {self.__class__.__name__} with {type(other)}"
434 )
435 home_dict = self.to_dict(single=True, required=False)
436 try:
437 if home_dict == other_dict:
438 return True
439 except ValueError:
440 # Handle numpy arrays in dictionaries which cannot be directly compared
441 pass
443 equals = True
444 for key, value in home_dict.items():
445 skip_key_bool = False
446 for skip_key in self._skip_equals:
447 if skip_key in key:
448 skip_key_bool = True
449 if skip_key_bool:
450 continue
451 try:
452 other_value = other_dict[key]
453 if isinstance(value, np.ndarray):
454 if value.size != other_value.size:
455 msg = f"Array sizes for {key} differ: {value.size} != {other_value.size}"
456 logger.info(msg)
457 equals = False
458 continue
459 if not (value == other_value).all():
460 msg = f"{key}: {value} != {other_value}"
461 logger.info(msg)
462 equals = False
463 elif isinstance(value, (float, int, complex)):
464 # Handle None values in numeric comparisons
465 if other_value is None or value is None:
466 # Special case for coordinate fields: treat None and 0.0 as equal
467 coordinate_fields = ["x", "y", "z", "x2", "y2", "z2"]
468 is_coordinate_field = any(
469 key.endswith(f".{coord}") for coord in coordinate_fields
470 )
472 if is_coordinate_field and (
473 (value is None and other_value == 0.0)
474 or (value == 0.0 and other_value is None)
475 ):
476 # Coordinate fields: None and 0.0 are considered equivalent
477 continue
478 elif value != other_value:
479 msg = f"{key}: {value} != {other_value}"
480 logger.info(msg)
481 equals = False
482 elif not np.isclose(value, other_value):
483 msg = f"{key}: {value} != {other_value}"
484 logger.info(msg)
485 equals = False
486 else:
487 if value in NULL_VALUES and other_value in NULL_VALUES:
488 continue
489 if value != other_value:
490 msg = f"{key}: {value} != {other_value}"
491 logger.info(msg)
492 equals = False
493 except KeyError:
494 msg = "Cannot find {0} in other".format(key)
495 logger.info(msg)
497 return equals
499 def __ne__(
500 self, other: "MetadataBase" | dict | str | pd.Series | et.Element
501 ) -> bool:
502 """
503 Compare this metadata object with another for inequality.
505 Parameters
506 ----------
507 other : MetadataBase | dict | str | pd.Series | et.Element
508 Object to compare with
510 Returns
511 -------
512 bool
513 True if objects are not equal, False otherwise
514 """
515 return not self.__eq__(other)
517 def __len__(self) -> int:
518 """
519 Return the number of attributes in this metadata object.
521 Returns
522 -------
523 int
524 Number of attributes (including nested attributes)
525 """
526 return len(self.get_attribute_list())
528 def load(self, other: "MetadataBase" | dict | str | pd.Series | et.Element) -> None:
529 """
530 Load metadata from various formats and populate attributes.
532 The other object should have the same attributes as the current object.
533 If there are different attributes, validation may not be accurate.
534 Consider making a new model if you need a different object structure.
536 Parameters
537 ----------
538 other : MetadataBase | dict | str | pd.Series | et.Element
539 Source object from which to fill attributes. Supported types:
540 - MetadataBase: Another metadata instance
541 - dict: Dictionary with metadata
542 - str: JSON string representation
543 - pd.Series: Pandas Series with metadata
544 - et.Element: XML Element with metadata
546 Raises
547 ------
548 MTSchemaError
549 If the input type is not supported
551 Examples
552 --------
553 >>> metadata = MetadataBase()
554 >>> metadata.load({"latitude": 45.0, "longitude": -120.0})
555 >>> metadata.load('{"latitude": 45.0}')
556 """
557 if isinstance(other, MetadataBase):
558 self.update(other)
559 elif isinstance(other, dict):
560 self.from_dict(other)
561 elif isinstance(other, str):
562 if other.lower() in NULL_VALUES:
563 return
564 self.from_json(other)
565 elif isinstance(other, pd.Series):
566 self.from_series(other)
567 elif isinstance(other, et.Element):
568 self.from_xml(other)
569 else:
570 msg = f"Cannot load {type(other)} into {self.__class__.__name__}"
571 logger.error(msg)
572 raise MTSchemaError(msg)
574 def update(self, other: "MetadataBase", match: list[str] = []) -> None:
575 """
576 Update attribute values from another like element, skipping None
578 Parameters
579 ----------
580 other : MetadataBase
581 other Base object from which to update attributes
582 """
583 if not isinstance(other, type(self)):
584 # Allow updates between compatible metadata classes (e.g. enhanced vs original)
585 if not (
586 hasattr(other, "__class__")
587 and hasattr(self, "__class__")
588 and other.__class__.__name__ == self.__class__.__name__
589 ):
590 logger.warning(f"Cannot update {type(self)} with {type(other)}")
591 return
592 for k in match:
593 if self.get_attr_from_name(k) != other.get_attr_from_name(k):
594 msg = (
595 f"{k} is not equal {self.get_attr_from_name(k)} != "
596 f"{other.get_attr_from_name(k)}"
597 )
598 logger.error(msg)
599 raise ValueError(msg)
600 for k, v in other.to_dict(single=True).items():
601 if hasattr(v, "size"):
602 if v.size > 0:
603 self.update_attribute(k, v)
604 else:
605 if (
606 v
607 not in [None, 0.0, [], "", "1980-01-01T00:00:00+00:00"]
608 + NULL_VALUES
609 ):
610 self.update_attribute(k, v)
612 ## cannot override the __deepcopy__ method in pydantic.BaseModel otherwise bad
613 ## things happen
614 def copy(
615 self, update: Mapping[str, Any] | None = None, deep: bool = True
616 ) -> "MetadataBase":
617 """
618 Create a copy of the current metadata object.
620 This is a wrapper around Pydantic's copy method with special handling
621 for non-copyable objects like HDF5 references. Non-copyable objects
622 are set to None in the copied object.
624 Parameters
625 ----------
626 update : Mapping[str, Any] | None, optional
627 Values to change/add in the new model. Note: the data is not
628 validated before creating the new model, so ensure it's trustworthy.
629 Default is None.
630 deep : bool, optional
631 If True, create a deep copy of the object. Default is True.
633 Returns
634 -------
635 MetadataBase
636 A copy of the current object with updates applied
638 Raises
639 ------
640 TypeError
641 If the object contains non-copyable objects and fallback fails
643 Notes
644 -----
645 - HDF5 references cannot be deep copied and will be set to None
646 - If deep copy fails, falls back to dictionary-based copying
648 Examples
649 --------
650 >>> original = MetadataBase(latitude=45.0)
651 >>> copy = original.copy(update={"latitude": 46.0})
652 """
654 # Handle HDF5 references and other non-copyable objects
655 if update is None:
656 update = {}
657 else:
658 update = dict(update) # Convert to mutable dict
660 # Check for HDF5 references that cannot be deep copied
661 if deep and hasattr(self, "hdf5_reference"):
662 hdf5_ref = getattr(self, "hdf5_reference", None)
663 if hdf5_ref is not None:
664 # Set to None to avoid deepcopy issues
665 update["hdf5_reference"] = None
667 # Also check for any other MTH5-specific fields that might not be copyable
668 if hasattr(self, "mth5_type"):
669 mth5_type_value = getattr(self, "mth5_type", None)
670 # Only preserve mth5_type if it has a valid non-None value
671 if mth5_type_value is not None:
672 update["mth5_type"] = mth5_type_value
674 try:
675 copied_obj = self.model_copy(update=update, deep=deep)
676 except (TypeError, AttributeError) as e:
677 if "no default __reduce__" in str(e) or "__cinit__" in str(e):
678 # Fallback: create a new instance from dictionary representation
679 # This avoids any non-copyable objects entirely
680 self_dict = self.to_dict()
681 new_instance = type(self)()
682 new_instance.from_dict(self_dict)
684 # Apply any updates
685 for key, value in update.items():
686 if hasattr(new_instance, key):
687 setattr(new_instance, key, value)
689 return new_instance
690 else:
691 # Re-raise if it's a different error
692 raise
694 return copied_obj
696 def get_all_fields(self) -> dict:
697 """
698 Get all field attributes in the Metadata class. Will
699 search recursively and return dotted keys. For
700 instance `{location.latitude: ...}`.
702 Returns
703 -------
704 Dict
705 A flattened dictionary of dotted keys of all attributes
706 within the class.
707 """
709 if not self._fields:
710 self._fields = pydantic_helpers.flatten_field_tree_map(
711 pydantic_helpers.get_all_fields_serializable(self)
712 )
713 return self._fields
715 def get_attribute_list(self) -> list[str]:
716 """
717 return a list of the attributes
719 Returns
720 -------
721 list[str]
722 A list of attribute names
723 """
725 return sorted(self.get_all_fields().keys())
727 @property
728 def _required_fields(self) -> list[str]:
729 """
730 Get a list of required fields according to metadata standards.
732 There is a distinction between "required" in Pydantic (must be defined
733 on instantiation) and "required" in metadata standards (must be present
734 in the standard even if the value is None).
736 Returns
737 -------
738 list[str]
739 List of required field names in the metadata standards
741 Notes
742 -----
743 Required fields are determined by the 'required' flag in field metadata,
744 not by Pydantic's required_on_init behavior.
745 """
746 required_fields = []
747 for name, field_dict in self.get_all_fields().items():
748 required = field_dict.get("required", False)
749 if required:
750 required_fields.append(name)
752 return required_fields
754 def _field_info_to_string(self, name: str, field_dict: dict[str, Any]) -> str:
755 """
756 Create a formatted string from field information for pretty printing.
758 Parameters
759 ----------
760 name : str
761 Name of the field
762 field_dict : dict[str, Any]
763 Dictionary containing field metadata (type, description, units, etc.)
765 Returns
766 -------
767 str
768 Formatted string representation of the field information
770 Examples
771 --------
772 >>> info = {"type": "float", "description": "Latitude", "units": "degrees"}
773 >>> result = obj._field_info_to_string("latitude", info)
774 """
776 line = [f"{name}:"]
778 for key, value in field_dict.items():
779 line.append(f"\t{key}: {value}")
781 return "\n".join(line)
783 def attribute_information(self, name: str | None = None) -> None:
784 """
785 Print descriptive information about attributes.
787 If name is provided, prints information for that specific attribute.
788 Otherwise, prints information for all attributes.
790 Parameters
791 ----------
792 name : str | None, optional
793 Attribute name for a specific attribute. If None, prints information
794 for all attributes. Default is None.
796 Raises
797 ------
798 MTSchemaError
799 If the specified attribute name is not found
801 Examples
802 --------
803 >>> metadata.attribute_information("latitude")
804 >>> metadata.attribute_information() # Print all attributes
805 """
806 attr_dict = self.get_all_fields()
807 lines = []
808 if name:
809 try:
810 v_dict = attr_dict[name]
811 except KeyError as error:
812 msg = f"{error} not attribute {name} found."
813 logger.error(msg)
814 raise MTSchemaError(msg)
815 lines.append(self._field_info_to_string(name, v_dict))
816 else:
817 lines = []
818 for name, v_dict in attr_dict.items():
819 lines.append(self._field_info_to_string(name, v_dict))
820 lines.append("=" * 50)
821 print("\n".join(lines))
823 def get_attr_from_name(self, name: str) -> Any:
824 """
825 Access attribute from the given name, supporting dot notation.
827 The name can contain nested object references separated by dots,
828 e.g., 'location.latitude' or 'time_period.start'.
830 Parameters
831 ----------
832 name : str
833 Name of attribute to get, may include dots for nested attributes
835 Returns
836 -------
837 Any
838 The attribute value
840 Raises
841 ------
842 KeyError
843 If the attribute is not found
844 AttributeError
845 If the attribute path is invalid
847 Examples
848 --------
849 >>> metadata = MetadataBase(**{'location.latitude': 45.0})
850 >>> metadata.get_attr_from_name('location.latitude')
851 45.0
853 Notes
854 -----
855 This is a helper function for names with '.' for easier access when
856 reading from dictionaries or other flat structures.
857 """
858 value, _ = helpers.recursive_split_getattr(self, name)
859 return value
861 @deprecated(
862 "set_attr_from_name will be deprecated in the future. Use update_attribute."
863 )
864 def set_attr_from_name(self, name: str, value: Any) -> None:
865 """
866 Helper function to set attribute from the given name.
868 The name can contain the name of an object which must be separated
869 by a '.' for e.g. {object_name}.{name} --> location.latitude
871 .. note:: this is a helper function for names with '.' in the name for
872 easier getting when reading from dictionary.
874 :param name: name of attribute to get.
875 :type name: string
876 :param value: attribute value
877 :type value: type is defined by the attribute name
879 :Example:
881 >>> b = Base(**{'category.test_attr':10})
882 >>> b.set_attr_from_name('category.test_attr', '10')
883 >>> print(b.category.test_attr)
884 '10'
885 """
887 @deprecated("add_base_attribute is deprecated. Use add_new_field.")
888 def add_base_attribute(
889 self,
890 ):
891 pass
893 def add_new_field(self, name: str, new_field_info: FieldInfo) -> BaseModel:
894 """
895 This is going to be much different from older versions of mt_metadata.
897 This will return a new BaseModel with the added attribute. Going to use
898 `pydantid.create_model` from the exsiting attribute information and the
899 added attribute.
901 Add an attribute to _attr_dict so it will be included in the
902 output dictionary
904 Parameters
905 ----------
906 name : str
907 name of attribute
908 new_field_info : FieldInfo
909 value of the new attribute
911 Returns
912 -------
913 BaseModel
914 A new BaseModel instance with the added attribute.
916 Should include:
918 * annotated --> the data type [ str | int | float | bool ]
919 * required --> required in the standards [ True | False ]
920 * units --> units of the attribute, must be a string
921 * alias --> other possible names for the attribute
922 * options --> if only a few options are accepted, separated by | or
923 comma.b [ option_01 | option_02 | other ]. 'other' means other options
924 available but not yet defined.
925 * example --> an example of the attribute
927 :Example:
929 .. code-block:: python
931 from pydantic.fields import FieldInfo
932 new_field = FieldInfo(
933 annotated=str,
934 default="default_value",
935 required=False,
936 description="new field description",
937 alias="new_field_alias",
938 json_schema_extra={"units":"km"}
939 )
941 existing_basemodel = MetadataBase()
942 new_basemodel = existing_basemodel.add_new_field("new_attribute", new_field)
943 new_basemodel_object = new_basemodel()
945 """
946 existing_model_fields = self.__pydantic_fields__.copy()
947 existing_model_fields[name] = new_field_info
948 all_fields = {k: (v.annotation, v) for k, v in existing_model_fields.items()}
950 return create_model( # type: ignore
951 self.__class__.__name__, # Preserve the original class name
952 __base__=self.__class__, # Preserve the original class hierarchy
953 **all_fields,
954 )
956 def to_dict(
957 self, nested: bool = False, single: bool = False, required: bool = True
958 ) -> dict[str, Any]:
959 """
960 Convert metadata to a dictionary representation.
962 Parameters
963 ----------
964 nested : bool, optional
965 If True, return a nested dictionary structure. If False, use
966 dot-notation for nested keys. Default is False.
967 single : bool, optional
968 If True, return just the metadata dictionary without the class name
969 wrapper (meta_dict[class_name]). Default is False.
970 required : bool, optional
971 If True, return only required elements and elements with non-None
972 values. If False, include all fields. Default is True.
974 Returns
975 -------
976 dict[str, Any]
977 Dictionary representation of the metadata
979 Notes
980 -----
981 - Comment objects are converted to simple strings for backward compatibility
982 when they only contain a value (no author or custom timestamp)
983 - Numpy arrays, Enums, and nested MetadataBase objects are handled specially
984 - Required fields are always included even if None
986 Examples
987 --------
988 >>> metadata.to_dict(nested=True, single=True)
989 >>> metadata.to_dict(required=False) # Include all fields
990 """
992 meta_dict = {}
994 # Keep track of processed comment attributes to avoid duplication
995 processed_comments = set()
997 for name in self.get_attribute_list():
998 # Special handling for comment attributes for backwards compatibility
999 if (
1000 ".value" in name
1001 and name.replace(".value", "") not in processed_comments
1002 ):
1003 base_attr_name = name.replace(".value", "")
1004 # Check if this is a comment attribute
1005 try:
1006 comment_obj = self.get_attr_from_name(base_attr_name)
1007 if (
1008 hasattr(comment_obj, "__class__")
1009 and comment_obj.__class__.__name__ == "Comment"
1010 ):
1011 # Check if this is a simple comment (only value set, no author or custom timestamp)
1012 default_timestamp = "1980-01-01T00:00:00+00:00"
1013 is_simple_comment = (
1014 hasattr(comment_obj, "value")
1015 and comment_obj.value is not None
1016 and isinstance(comment_obj.value, str)
1017 and (
1018 not hasattr(comment_obj, "author")
1019 or comment_obj.author is None
1020 or comment_obj.author == ""
1021 )
1022 and (
1023 not hasattr(comment_obj, "time_stamp")
1024 or comment_obj.time_stamp is None
1025 or str(comment_obj.time_stamp) == default_timestamp
1026 )
1027 )
1029 if is_simple_comment and not nested:
1030 # Use simple string format for backwards compatibility
1031 if required:
1032 if comment_obj.value not in [
1033 None,
1034 "1980-01-01T00:00:00+00:00",
1035 "1980",
1036 [],
1037 "",
1038 ]:
1039 meta_dict[base_attr_name] = str(comment_obj.value)
1040 else:
1041 meta_dict[base_attr_name] = str(comment_obj.value)
1043 # Mark this comment as processed to skip its nested attributes
1044 processed_comments.add(base_attr_name)
1045 continue
1046 else:
1047 # Use nested format - let individual attributes be processed normally
1048 pass
1049 except (AttributeError, KeyError):
1050 # Not a comment object or attribute doesn't exist, process normally
1051 pass
1053 # Skip nested comment attributes if we already processed the base comment
1054 skip_attribute = False
1055 for processed_comment in processed_comments:
1056 if name.startswith(processed_comment + "."):
1057 skip_attribute = True
1058 break
1060 if skip_attribute:
1061 continue
1063 try:
1064 value = self.get_attr_from_name(name)
1065 # Special handling for Comment objects for backwards compatibility
1066 if (
1067 hasattr(value, "__class__")
1068 and value.__class__.__name__ == "Comment"
1069 ):
1070 # Check if this is a simple comment (only value set, no author or custom timestamp)
1071 default_timestamp = "1980-01-01T00:00:00+00:00"
1072 is_simple_comment = (
1073 hasattr(value, "value")
1074 and value.value is not None
1075 and isinstance(value.value, str)
1076 and (
1077 not hasattr(value, "author")
1078 or value.author is None
1079 or value.author == ""
1080 )
1081 and (
1082 not hasattr(value, "time_stamp")
1083 or value.time_stamp is None
1084 or str(value.time_stamp) == default_timestamp
1085 )
1086 )
1088 if is_simple_comment and not nested:
1089 # Return simple string for backwards compatibility
1090 value = str(value.value)
1091 else:
1092 # Return full nested format
1093 value = value.to_dict(nested=nested, required=required)
1094 elif hasattr(value, "to_dict"):
1095 value = value.to_dict(nested=nested, required=required)
1096 elif isinstance(value, dict):
1097 for key, obj in value.items():
1098 if hasattr(obj, "to_dict"):
1099 value[key] = obj.to_dict(nested=nested, required=required)
1100 elif isinstance(obj, Enum):
1101 value[key] = obj.value
1102 else:
1103 value[key] = obj
1104 elif isinstance(value, list):
1105 v_list = []
1106 for obj in value:
1107 if hasattr(obj, "to_dict"):
1108 v_list.append(obj.to_dict(nested=nested, required=required))
1109 elif isinstance(obj, Enum):
1110 v_list.append(obj.value)
1111 else:
1112 v_list.append(obj)
1113 value = v_list
1114 elif isinstance(value, Enum):
1115 value = value.value
1116 elif hasattr(value, "unicode_string"):
1117 value = value.unicode_string()
1118 elif isinstance(value, (str, int, float, bool)):
1119 value = value
1120 except AttributeError as error:
1121 logger.debug(error)
1122 value = None
1123 if required:
1124 if isinstance(value, (np.ndarray)):
1125 if name == "zeros" or name == "poles":
1126 meta_dict[name] = value
1127 elif value.all() != 0:
1128 meta_dict[name] = value
1129 elif hasattr(value, "size"):
1130 if value.size > 0:
1131 meta_dict[name] = value
1132 elif (
1133 value not in [None, "1980-01-01T00:00:00+00:00", "1980", [], ""]
1134 or name in self._required_fields
1135 or helpers._should_include_coordinate_field(name)
1136 or helpers._should_convert_none_to_empty_string(name)
1137 ):
1138 # Convert None coordinate fields to 0.0 for backward compatibility
1139 if helpers._should_include_coordinate_field(name) and value is None:
1140 value = 0.0
1141 # Convert None string fields to empty string for backward compatibility
1142 elif (
1143 helpers._should_convert_none_to_empty_string(name)
1144 and value is None
1145 ):
1146 value = ""
1147 meta_dict[name] = value
1148 else:
1149 meta_dict[name] = value
1150 if nested:
1151 meta_dict = helpers.structure_dict(meta_dict)
1152 meta_dict = {
1153 validate_name(self.__class__.__name__): OrderedDict(
1154 sorted(meta_dict.items(), key=itemgetter(0))
1155 )
1156 }
1158 if single:
1159 meta_dict = meta_dict[list(meta_dict.keys())[0]]
1160 return meta_dict
1162 def from_dict(self, meta_dict: dict, skip_none: bool = False) -> None:
1163 """
1164 Fill attributes from a dictionary.
1166 The dictionary can be nested or flat with dot-notation keys. If the
1167 dictionary has a single key matching the class name, it will be
1168 unwrapped automatically.
1170 Parameters
1171 ----------
1172 meta_dict : dict
1173 Dictionary with keys equal to metadata attribute names. Supports
1174 both nested dictionaries and flat dictionaries with dot-notation keys.
1175 skip_none : bool, optional
1176 If True, skip attributes with None values. Default is False.
1178 Raises
1179 ------
1180 MTSchemaError
1181 If the input is not a valid dictionary
1183 Examples
1184 --------
1185 >>> metadata.from_dict({"latitude": 45.0, "longitude": -120.0})
1186 >>> metadata.from_dict({"location": {"latitude": 45.0}})
1187 """
1188 if not isinstance(meta_dict, (dict, OrderedDict)):
1189 msg = f"Input must be a dictionary not {type(meta_dict)}"
1190 logger.error(msg)
1191 raise MTSchemaError(msg)
1192 keys = list(meta_dict.keys())
1193 if len(keys) == 1:
1194 if isinstance(meta_dict[keys[0]], (dict, OrderedDict)):
1195 class_name = keys[0]
1196 if class_name.lower() != validate_name(self.__class__.__name__):
1197 msg = (
1198 "name of input dictionary is not the same as class type "
1199 f"input = {class_name}, class type = {self.__class__.__name__}"
1200 )
1201 logger.debug(msg, class_name, self.__class__.__name__)
1202 meta_dict = helpers.flatten_dict(meta_dict[class_name])
1203 else:
1204 meta_dict = helpers.flatten_dict(meta_dict)
1206 else:
1207 logger.debug(
1208 f"Assuming input dictionary is of type {self.__class__.__name__}",
1209 )
1210 meta_dict = helpers.flatten_dict(meta_dict)
1211 # set attributes by key.
1212 for name, value in meta_dict.items():
1213 if skip_none:
1214 if value in NULL_VALUES:
1215 continue
1216 self.update_attribute(name, value)
1218 def to_json(
1219 self, nested: bool = False, indent: str = " " * 4, required: bool = True
1220 ) -> str:
1221 """
1222 Write a json string from a given object, taking into account other
1223 class objects contained within the given object.
1225 Parameters
1226 ----------
1227 indent : str
1228 indentation for the json string, default is 4 spaces
1230 nested : bool
1231 make the returned json nested
1232 required : bool
1233 return just the required elements and any elements with non-None values
1235 Returns
1236 -------
1237 str
1238 json string representation of the object
1240 """
1242 return json.dumps(
1243 self.to_dict(nested=nested, required=required),
1244 cls=helpers.NumpyEncoder,
1245 indent=indent,
1246 )
1248 def from_json(self, json_str: str | Path) -> None:
1249 """
1250 read in a json string and update attributes of an object
1252 Parameters
1253 ----------
1254 json_str : str | Path
1255 json string or file path to json file
1257 """
1258 if isinstance(json_str, str):
1259 try:
1260 json_path = Path(json_str)
1261 if json_path.exists():
1262 with open(json_path, "r") as fid:
1263 json_dict = json.load(fid)
1264 except OSError:
1265 pass
1266 json_dict = json.loads(json_str)
1267 elif isinstance(json_str, Path):
1268 if json_str.exists():
1269 with open(json_str, "r") as fid:
1270 json_dict = json.load(fid)
1271 elif not isinstance(json_str, (str, Path)):
1272 msg = f"Input must be valid JSON string not {type(json_str)}"
1273 logger.error(msg)
1274 raise MTSchemaError(msg)
1275 self.from_dict(json_dict)
1277 def from_series(self, pd_series: pd.Series) -> None:
1278 """
1279 Fill attributes from a Pandas Series.
1281 Parameters
1282 ----------
1283 pd_series : pd.Series
1284 Series containing metadata information. The series must be single
1285 layered with key names separated by dots for nested attributes
1286 (e.g., 'location.latitude').
1288 Raises
1289 ------
1290 MTSchemaError
1291 If the input is not a Pandas Series
1293 Examples
1294 --------
1295 >>> series = pd.Series({"latitude": 45.0, "longitude": -120.0})
1296 >>> metadata.from_series(series)
1298 Notes
1299 -----
1300 Types are not currently enforced from the series - validation occurs
1301 via Pydantic after assignment.
1302 """
1303 if not isinstance(pd_series, pd.Series):
1304 msg = f"Input must be a Pandas.Series not type {type(pd_series)}"
1305 logger.error(msg)
1306 raise MTSchemaError(msg)
1307 for key, value in pd_series.items():
1308 key = str(key)
1309 self.update_attribute(key, value)
1311 def to_series(self, required: bool = True) -> pd.Series:
1312 """
1313 Convert attribute list to a pandas.Series
1315 .. note:: this is a flattened version of the metadata
1317 Parameters
1318 ----------
1319 required : bool
1320 return just the required elements and any elements with non-None values
1322 Returns
1323 -------
1324 pandas.Series
1325 Series containing the metadata information
1327 """
1329 return pd.Series(self.to_dict(single=True, required=required))
1331 def to_xml(self, string: bool = False, required: bool = True) -> str | et.Element:
1332 """
1333 Convert metadata to an XML representation.
1335 Creates an XML element with type and unit information for each attribute.
1337 Parameters
1338 ----------
1339 string : bool, optional
1340 If True, return XML as a string. If False, return an XML Element.
1341 Default is False.
1342 required : bool, optional
1343 If True, include only required elements and elements with non-None
1344 values. If False, include all elements. Default is True.
1346 Returns
1347 -------
1348 str | et.Element
1349 XML Element object if string=False, otherwise XML string
1351 Examples
1352 --------
1353 >>> xml_elem = metadata.to_xml()
1354 >>> xml_str = metadata.to_xml(string=True)
1355 """
1356 attr_dict = self.get_all_fields()
1357 element = helpers.dict_to_xml(
1358 self.to_dict(nested=True, required=required), attr_dict
1359 )
1360 if not string:
1361 return element
1362 else:
1363 return helpers.element_to_string(element)
1365 def from_xml(self, xml_element: et.Element) -> None:
1366 """
1367 Fill attributes from an XML element.
1369 Parameters
1370 ----------
1371 xml_element : et.Element
1372 XML element from which to fill attributes. The element structure
1373 should match the metadata schema.
1375 Examples
1376 --------
1377 >>> import xml.etree.ElementTree as et
1378 >>> xml_str = '<metadata><latitude>45.0</latitude></metadata>'
1379 >>> elem = et.fromstring(xml_str)
1380 >>> metadata.from_xml(elem)
1382 Notes
1383 -----
1384 The XML element is converted to a dictionary first, then loaded
1385 via the from_dict method.
1386 """
1388 self.from_dict(helpers.element_to_dict(xml_element))