Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ base \ metadata.py: 83%

429 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:11 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Created on Wed Dec 23 20:41:16 2020 

4 

5:copyright: 

6 Jared Peacock (jpeacock@usgs.gov) 

7 

8:license: MIT 

9 

10""" 

11from __future__ import annotations 

12 

13import json 

14from collections import OrderedDict 

15from enum import Enum 

16 

17# ============================================================================= 

18# Imports 

19# ============================================================================= 

20from operator import itemgetter 

21from pathlib import Path 

22from typing import Any, Mapping 

23from xml.etree import cElementTree as et 

24 

25import numpy as np 

26import pandas as pd 

27from loguru import logger 

28from pydantic import ( 

29 BaseModel, 

30 computed_field, 

31 ConfigDict, 

32 create_model, 

33 field_validator, 

34 model_validator, 

35) 

36from pydantic.fields import FieldInfo, PrivateAttr 

37from typing_extensions import deprecated 

38 

39from mt_metadata import NULL_VALUES 

40from mt_metadata.utils.exceptions import MTSchemaError 

41from mt_metadata.utils.validators import validate_attribute, validate_name 

42 

43from . import helpers, pydantic_helpers 

44 

45 

46# ============================================================================= 

47# Base class that everything else will inherit 

48# ============================================================================= 

49 

50 

51@deprecated("Base is deprecated, use MetadataBase instead") 

52class Base: 

53 pass 

54 

55 

56class DotNotationBaseModel(BaseModel): 

57 """ 

58 Base model that supports dot notation for setting nested attributes. 

59 

60 This model extends Pydantic's BaseModel to allow setting nested attributes 

61 using dot notation (e.g., 'location.latitude' or 'time_period.start'). 

62 It automatically handles both flat and nested dictionary structures. 

63 

64 Parameters 

65 ---------- 

66 **data : Any 

67 Keyword arguments representing field values. Supports both flat keys 

68 and dot-notation keys for nested attributes. 

69 

70 Examples 

71 -------- 

72 >>> model = DotNotationBaseModel(**{"location.latitude": 45.0}) 

73 >>> model = DotNotationBaseModel(**{"location": {"latitude": 45.0}}) 

74 """ 

75 

76 def __init__(self, **data: Any) -> None: 

77 # Process dot notation fields first 

78 flat_data = {} 

79 nested_data = {} 

80 

81 for key, value in data.items(): 

82 if "." in key: 

83 # This is a dotted field, handle specially 

84 self._set_nested_attribute(nested_data, key, value) 

85 else: 

86 # Regular field, pass to Pydantic as-is 

87 if key == validate_name(self.__class__.__name__): 

88 if isinstance(value, dict): 

89 # If the value is a dict, we need to flatten it 

90 for nested_key, nested_value in value.items(): 

91 if isinstance(nested_value, dict): 

92 # Flatten nested dicts 

93 self._set_nested_attribute( 

94 nested_data, nested_key, nested_value 

95 ) 

96 else: 

97 flat_data[nested_key] = nested_value 

98 else: 

99 # Non-dict value for class name key should be treated as regular field 

100 flat_data[key] = value 

101 else: 

102 flat_data[key] = value 

103 

104 # Merge the nested dict into flat dict (nested takes precedence) 

105 flat_data.update(nested_data) 

106 

107 # Call parent constructor with processed data 

108 super().__init__(**flat_data) 

109 

110 def _set_nested_attribute( 

111 self, data_dict: dict[str, Any], dotted_key: str, value: Any 

112 ) -> None: 

113 """ 

114 Set a nested attribute in data_dict based on dotted key notation. 

115 

116 Parameters 

117 ---------- 

118 data_dict : dict[str, Any] 

119 Dictionary to populate with nested structure 

120 dotted_key : str 

121 Dot-separated key path (e.g., 'time_period.start') 

122 value : Any 

123 Value to set at the nested location 

124 

125 Examples 

126 -------- 

127 >>> data = {} 

128 >>> model._set_nested_attribute(data, "time_period.start", "2020-01-01") 

129 >>> # Results in: {"time_period": {"start": "2020-01-01"}} 

130 """ 

131 parts = dotted_key.split(".") 

132 current = data_dict 

133 

134 # Navigate to the deepest level, creating dicts along the way 

135 for i, part in enumerate(parts[:-1]): 

136 if part not in current: 

137 current[part] = {} 

138 elif not isinstance(current[part], dict): 

139 # Convert to dict if it's not already 

140 current[part] = {} 

141 current = current[part] 

142 

143 # Set the final value 

144 current[parts[-1]] = value 

145 

146 def update_attribute(self, attr_name: str, attr_value: Any) -> None: 

147 """ 

148 Update a nested attribute using dot notation. 

149 

150 Parameters 

151 ---------- 

152 attr_name : str 

153 Name of the attribute to update, supports dot notation for 

154 nested attributes (e.g., 'time_period.start') 

155 attr_value : Any 

156 New value for the attribute 

157 

158 Raises 

159 ------ 

160 AttributeError 

161 If the attribute path does not exist 

162 

163 Examples 

164 -------- 

165 >>> model.update_attribute("time_period.start", "2020-01-01") 

166 >>> model.update_attribute("latitude", 45.0) 

167 """ 

168 if "." not in attr_name: 

169 # Directly set the attribute 

170 setattr(self, attr_name, attr_value) 

171 return 

172 

173 # For nested attributes, we need to navigate the object graph 

174 parts = attr_name.split(".") 

175 current = self 

176 

177 # Navigate to the deepest level 

178 for part in parts[:-1]: 

179 if not hasattr(current, part): 

180 raise AttributeError( 

181 f"'{type(current).__name__}' has no attribute '{part}'" 

182 ) 

183 current = getattr(current, part) 

184 

185 # Set the final attribute 

186 setattr(current, parts[-1], attr_value) 

187 setattr(current, parts[-1], attr_value) 

188 

189 

190class MetadataBase(DotNotationBaseModel): 

191 """ 

192 Base class for all metadata objects with Pydantic validation. 

193 

194 MetadataBase extends DotNotationBaseModel (which inherits from Pydantic's 

195 BaseModel) to provide automatic validation according to metadata standards. 

196 It adds functionality beyond dictionaries, supporting JSON, XML, pandas 

197 Series, and other formats for metadata interchange. 

198 

199 Attributes 

200 ---------- 

201 _skip_equals : list[str] 

202 Private attribute listing fields to skip in equality comparisons 

203 _fields : dict[str, Any] 

204 Private attribute caching field information 

205 

206 Notes 

207 ----- 

208 - All field assignments are validated automatically via Pydantic 

209 - None values are converted to appropriate defaults (empty string or 0.0) 

210 - Supports nested attribute access via dot notation 

211 - Thread-safe for read operations after initialization 

212 """ 

213 

214 model_config = ConfigDict( 

215 validate_assignment=True, 

216 use_attribute_docstrings=True, 

217 extra="allow", 

218 arbitrary_types_allowed=True, # need this for numpy and pd types 

219 use_enum_values=True, 

220 coerce_numbers_to_str=True, 

221 ) 

222 

223 _skip_equals: list[str] = PrivateAttr(["processed_date", "creation_time"]) 

224 _fields: dict[str, Any] = PrivateAttr(default_factory=dict) 

225 

226 @model_validator(mode="before") 

227 @classmethod 

228 def convert_none_to_empty(cls, values): 

229 """Convert None values to empty strings or 0.0 for numeric fields, except for fields that explicitly default to None.""" 

230 # Ensure values is a dictionary before processing 

231 if not isinstance(values, dict): 

232 return values 

233 

234 for field, field_info in cls.model_fields.items(): 

235 # Skip conversion if the field's default is explicitly None 

236 if field_info.default is None: 

237 continue 

238 

239 # Only process fields that are in the input values and are None 

240 if field in values and values[field] is None: 

241 try: 

242 annotation = field_info.annotation 

243 # Convert None to empty string for str fields 

244 if annotation is str: 

245 values[field] = "" 

246 # Convert None to 0.0 for float/int fields 

247 elif annotation in (float, int): 

248 values[field] = 0.0 

249 except (AttributeError, TypeError): 

250 # If there's any issue checking the annotation, skip conversion 

251 pass 

252 return values 

253 

254 @field_validator("*", mode="before") 

255 @classmethod 

256 def validate_none_on_assignment(cls, value: Any, info: Any) -> Any: 

257 """ 

258 Convert None values to appropriate defaults when attributes are set. 

259 

260 This validator runs for all fields due to 'validate_assignment=True' in 

261 model config. It works generically for string and numeric fields without 

262 requiring subclass-specific validators. 

263 

264 Parameters 

265 ---------- 

266 value : Any 

267 The value being assigned to the field 

268 info : Any 

269 Pydantic validation info containing field name and metadata 

270 

271 Returns 

272 ------- 

273 Any 

274 Converted value (empty string for str, 0.0 for numeric) or original value 

275 

276 Notes 

277 ----- 

278 - For complex types, skips conversion and lets Pydantic handle validation 

279 - Does NOT convert None if the field explicitly has None as its default 

280 - Conversion rules: str -> '', float/int -> 0.0 

281 """ 

282 if value is None: 

283 field_name = info.field_name 

284 # Get field info from the class model fields 

285 if field_name in cls.model_fields: 

286 field_info = cls.model_fields[field_name] 

287 

288 # Skip conversion if the field's default is explicitly None 

289 if field_info.default is None: 

290 return value 

291 

292 # Only attempt conversion for primitive types 

293 try: 

294 # Check the annotation, handling both direct types and Annotated types 

295 annotation = field_info.annotation 

296 

297 # Convert None to empty string for str fields 

298 if annotation is str: 

299 return "" 

300 # Convert None to 0.0 for float/int fields 

301 elif annotation in (float, int): 

302 return 0.0 

303 except (AttributeError, TypeError): 

304 # If there's any issue checking the annotation, let Pydantic handle it normally 

305 pass 

306 return value 

307 

308 @computed_field 

309 @property 

310 def _class_name(self) -> str: 

311 return validate_attribute(self.__class__.__name__) 

312 

313 def __str__(self) -> str: 

314 """ 

315 

316 :return: table describing attributes 

317 :rtype: string 

318 

319 """ 

320 return str(self.model_dump()) 

321 

322 def __repr__(self) -> str: 

323 return self.to_json() 

324 

325 def __eq__( 

326 self, other: "MetadataBase" | dict | str | pd.Series | et.Element 

327 ) -> bool: 

328 """ 

329 Compare this metadata object with another for equality. 

330 

331 This method supports comparison with various types by converting them 

332 to MetadataBase objects first. Uses Pydantic's equality comparison after 

333 loading the other object. 

334 

335 Parameters 

336 ---------- 

337 other : MetadataBase | dict | str | pd.Series | et.Element 

338 Object to compare with. Can be: 

339 - Another MetadataBase instance 

340 - Dictionary with metadata 

341 - JSON string 

342 - Pandas Series 

343 - XML Element 

344 

345 Returns 

346 ------- 

347 bool 

348 True if objects are equal, False otherwise 

349 

350 Notes 

351 ----- 

352 The following Pydantic equality logic is used: 

353 

354 if isinstance(other, BaseModel): 

355 # When comparing instances of generic types for equality, as long as all field values are equal, 

356 # only require their generic origin types to be equal, rather than exact type equality. 

357 # This prevents headaches like MyGeneric(x=1) != MyGeneric[Any](x=1). 

358 self_type = self.__pydantic_generic_metadata__['origin'] or self.__class__ 

359 other_type = other.__pydantic_generic_metadata__['origin'] or other.__class__ 

360 

361 # Perform common checks first 

362 if not ( 

363 self_type == other_type 

364 and getattr(self, '__pydantic_private__', None) == getattr(other, '__pydantic_private__', None) 

365 and self.__pydantic_extra__ == other.__pydantic_extra__ 

366 ): 

367 return False 

368 

369 # We only want to compare pydantic fields but ignoring fields is costly. 

370 # We'll perform a fast check first, and fallback only when needed 

371 # See GH-7444 and GH-7825 for rationale and a performance benchmark 

372 

373 # First, do the fast (and sometimes faulty) __dict__ comparison 

374 if self.__dict__ == other.__dict__: 

375 # If the check above passes, then pydantic fields are equal, we can return early 

376 return True 

377 

378 # We don't want to trigger unnecessary costly filtering of __dict__ on all unequal objects, so we return 

379 # early if there are no keys to ignore (we would just return False later on anyway) 

380 model_fields = type(self).model_fields.keys() 

381 if self.__dict__.keys() <= model_fields and other.__dict__.keys() <= model_fields: 

382 return False 

383 

384 # If we reach here, there are non-pydantic-fields keys, mapped to unequal values, that we need to ignore 

385 # Resort to costly filtering of the __dict__ objects 

386 # We use operator.itemgetter because it is much faster than dict comprehensions 

387 # NOTE: Contrary to standard python class and instances, when the Model class has a default value for an 

388 # attribute and the model instance doesn't have a corresponding attribute, accessing the missing attribute 

389 # raises an error in BaseModel.__getattr__ instead of returning the class attribute 

390 # So we can use operator.itemgetter() instead of operator.attrgetter() 

391 getter = operator.itemgetter(*model_fields) if model_fields else lambda _: _utils._SENTINEL 

392 try: 

393 return getter(self.__dict__) == getter(other.__dict__) 

394 except KeyError: 

395 # In rare cases (such as when using the deprecated BaseModel.copy() method), 

396 # the __dict__ may not contain all model fields, which is how we can get here. 

397 # getter(self.__dict__) is much faster than any 'safe' method that accounts 

398 # for missing keys, and wrapping it in a `try` doesn't slow things down much 

399 # in the common case. 

400 self_fields_proxy = _utils.SafeGetItemProxy(self.__dict__) 

401 other_fields_proxy = _utils.SafeGetItemProxy(other.__dict__) 

402 return getter(self_fields_proxy) == getter(other_fields_proxy) 

403 

404 # other instance is not a BaseModel 

405 else: 

406 return NotImplemented # delegate to the other item in the comparison 

407 """ 

408 if other in [None]: 

409 return False 

410 

411 elif isinstance(other, (dict, str, pd.Series, et.Element)): 

412 try: 

413 # Attempt to load the other object into a new instance of MetadataBase 

414 # This will ensure that the other object has the same attributes as self 

415 other_obj = __class__().load(other) 

416 except Exception as e: 

417 logger.error( 

418 f"Failed to load other object of type {type(other)}: {other}. Error is: {e} " 

419 ) 

420 return False 

421 if not other_obj: 

422 return False 

423 

424 if hasattr(other_obj, "to_dict") and callable(other_obj.to_dict): 

425 other_dict = other_obj.to_dict(single=True, required=False) 

426 else: 

427 return False 

428 

429 elif isinstance(other, MetadataBase): 

430 other_dict = other.to_dict(single=True, required=False) 

431 else: 

432 raise ValueError( 

433 f"Cannot compare {self.__class__.__name__} with {type(other)}" 

434 ) 

435 home_dict = self.to_dict(single=True, required=False) 

436 try: 

437 if home_dict == other_dict: 

438 return True 

439 except ValueError: 

440 # Handle numpy arrays in dictionaries which cannot be directly compared 

441 pass 

442 

443 equals = True 

444 for key, value in home_dict.items(): 

445 skip_key_bool = False 

446 for skip_key in self._skip_equals: 

447 if skip_key in key: 

448 skip_key_bool = True 

449 if skip_key_bool: 

450 continue 

451 try: 

452 other_value = other_dict[key] 

453 if isinstance(value, np.ndarray): 

454 if value.size != other_value.size: 

455 msg = f"Array sizes for {key} differ: {value.size} != {other_value.size}" 

456 logger.info(msg) 

457 equals = False 

458 continue 

459 if not (value == other_value).all(): 

460 msg = f"{key}: {value} != {other_value}" 

461 logger.info(msg) 

462 equals = False 

463 elif isinstance(value, (float, int, complex)): 

464 # Handle None values in numeric comparisons 

465 if other_value is None or value is None: 

466 # Special case for coordinate fields: treat None and 0.0 as equal 

467 coordinate_fields = ["x", "y", "z", "x2", "y2", "z2"] 

468 is_coordinate_field = any( 

469 key.endswith(f".{coord}") for coord in coordinate_fields 

470 ) 

471 

472 if is_coordinate_field and ( 

473 (value is None and other_value == 0.0) 

474 or (value == 0.0 and other_value is None) 

475 ): 

476 # Coordinate fields: None and 0.0 are considered equivalent 

477 continue 

478 elif value != other_value: 

479 msg = f"{key}: {value} != {other_value}" 

480 logger.info(msg) 

481 equals = False 

482 elif not np.isclose(value, other_value): 

483 msg = f"{key}: {value} != {other_value}" 

484 logger.info(msg) 

485 equals = False 

486 else: 

487 if value in NULL_VALUES and other_value in NULL_VALUES: 

488 continue 

489 if value != other_value: 

490 msg = f"{key}: {value} != {other_value}" 

491 logger.info(msg) 

492 equals = False 

493 except KeyError: 

494 msg = "Cannot find {0} in other".format(key) 

495 logger.info(msg) 

496 

497 return equals 

498 

499 def __ne__( 

500 self, other: "MetadataBase" | dict | str | pd.Series | et.Element 

501 ) -> bool: 

502 """ 

503 Compare this metadata object with another for inequality. 

504 

505 Parameters 

506 ---------- 

507 other : MetadataBase | dict | str | pd.Series | et.Element 

508 Object to compare with 

509 

510 Returns 

511 ------- 

512 bool 

513 True if objects are not equal, False otherwise 

514 """ 

515 return not self.__eq__(other) 

516 

517 def __len__(self) -> int: 

518 """ 

519 Return the number of attributes in this metadata object. 

520 

521 Returns 

522 ------- 

523 int 

524 Number of attributes (including nested attributes) 

525 """ 

526 return len(self.get_attribute_list()) 

527 

528 def load(self, other: "MetadataBase" | dict | str | pd.Series | et.Element) -> None: 

529 """ 

530 Load metadata from various formats and populate attributes. 

531 

532 The other object should have the same attributes as the current object. 

533 If there are different attributes, validation may not be accurate. 

534 Consider making a new model if you need a different object structure. 

535 

536 Parameters 

537 ---------- 

538 other : MetadataBase | dict | str | pd.Series | et.Element 

539 Source object from which to fill attributes. Supported types: 

540 - MetadataBase: Another metadata instance 

541 - dict: Dictionary with metadata 

542 - str: JSON string representation 

543 - pd.Series: Pandas Series with metadata 

544 - et.Element: XML Element with metadata 

545 

546 Raises 

547 ------ 

548 MTSchemaError 

549 If the input type is not supported 

550 

551 Examples 

552 -------- 

553 >>> metadata = MetadataBase() 

554 >>> metadata.load({"latitude": 45.0, "longitude": -120.0}) 

555 >>> metadata.load('{"latitude": 45.0}') 

556 """ 

557 if isinstance(other, MetadataBase): 

558 self.update(other) 

559 elif isinstance(other, dict): 

560 self.from_dict(other) 

561 elif isinstance(other, str): 

562 if other.lower() in NULL_VALUES: 

563 return 

564 self.from_json(other) 

565 elif isinstance(other, pd.Series): 

566 self.from_series(other) 

567 elif isinstance(other, et.Element): 

568 self.from_xml(other) 

569 else: 

570 msg = f"Cannot load {type(other)} into {self.__class__.__name__}" 

571 logger.error(msg) 

572 raise MTSchemaError(msg) 

573 

574 def update(self, other: "MetadataBase", match: list[str] = []) -> None: 

575 """ 

576 Update attribute values from another like element, skipping None 

577 

578 Parameters 

579 ---------- 

580 other : MetadataBase 

581 other Base object from which to update attributes 

582 """ 

583 if not isinstance(other, type(self)): 

584 # Allow updates between compatible metadata classes (e.g. enhanced vs original) 

585 if not ( 

586 hasattr(other, "__class__") 

587 and hasattr(self, "__class__") 

588 and other.__class__.__name__ == self.__class__.__name__ 

589 ): 

590 logger.warning(f"Cannot update {type(self)} with {type(other)}") 

591 return 

592 for k in match: 

593 if self.get_attr_from_name(k) != other.get_attr_from_name(k): 

594 msg = ( 

595 f"{k} is not equal {self.get_attr_from_name(k)} != " 

596 f"{other.get_attr_from_name(k)}" 

597 ) 

598 logger.error(msg) 

599 raise ValueError(msg) 

600 for k, v in other.to_dict(single=True).items(): 

601 if hasattr(v, "size"): 

602 if v.size > 0: 

603 self.update_attribute(k, v) 

604 else: 

605 if ( 

606 v 

607 not in [None, 0.0, [], "", "1980-01-01T00:00:00+00:00"] 

608 + NULL_VALUES 

609 ): 

610 self.update_attribute(k, v) 

611 

612 ## cannot override the __deepcopy__ method in pydantic.BaseModel otherwise bad 

613 ## things happen 

614 def copy( 

615 self, update: Mapping[str, Any] | None = None, deep: bool = True 

616 ) -> "MetadataBase": 

617 """ 

618 Create a copy of the current metadata object. 

619 

620 This is a wrapper around Pydantic's copy method with special handling 

621 for non-copyable objects like HDF5 references. Non-copyable objects 

622 are set to None in the copied object. 

623 

624 Parameters 

625 ---------- 

626 update : Mapping[str, Any] | None, optional 

627 Values to change/add in the new model. Note: the data is not 

628 validated before creating the new model, so ensure it's trustworthy. 

629 Default is None. 

630 deep : bool, optional 

631 If True, create a deep copy of the object. Default is True. 

632 

633 Returns 

634 ------- 

635 MetadataBase 

636 A copy of the current object with updates applied 

637 

638 Raises 

639 ------ 

640 TypeError 

641 If the object contains non-copyable objects and fallback fails 

642 

643 Notes 

644 ----- 

645 - HDF5 references cannot be deep copied and will be set to None 

646 - If deep copy fails, falls back to dictionary-based copying 

647 

648 Examples 

649 -------- 

650 >>> original = MetadataBase(latitude=45.0) 

651 >>> copy = original.copy(update={"latitude": 46.0}) 

652 """ 

653 

654 # Handle HDF5 references and other non-copyable objects 

655 if update is None: 

656 update = {} 

657 else: 

658 update = dict(update) # Convert to mutable dict 

659 

660 # Check for HDF5 references that cannot be deep copied 

661 if deep and hasattr(self, "hdf5_reference"): 

662 hdf5_ref = getattr(self, "hdf5_reference", None) 

663 if hdf5_ref is not None: 

664 # Set to None to avoid deepcopy issues 

665 update["hdf5_reference"] = None 

666 

667 # Also check for any other MTH5-specific fields that might not be copyable 

668 if hasattr(self, "mth5_type"): 

669 mth5_type_value = getattr(self, "mth5_type", None) 

670 # Only preserve mth5_type if it has a valid non-None value 

671 if mth5_type_value is not None: 

672 update["mth5_type"] = mth5_type_value 

673 

674 try: 

675 copied_obj = self.model_copy(update=update, deep=deep) 

676 except (TypeError, AttributeError) as e: 

677 if "no default __reduce__" in str(e) or "__cinit__" in str(e): 

678 # Fallback: create a new instance from dictionary representation 

679 # This avoids any non-copyable objects entirely 

680 self_dict = self.to_dict() 

681 new_instance = type(self)() 

682 new_instance.from_dict(self_dict) 

683 

684 # Apply any updates 

685 for key, value in update.items(): 

686 if hasattr(new_instance, key): 

687 setattr(new_instance, key, value) 

688 

689 return new_instance 

690 else: 

691 # Re-raise if it's a different error 

692 raise 

693 

694 return copied_obj 

695 

696 def get_all_fields(self) -> dict: 

697 """ 

698 Get all field attributes in the Metadata class. Will 

699 search recursively and return dotted keys. For 

700 instance `{location.latitude: ...}`. 

701 

702 Returns 

703 ------- 

704 Dict 

705 A flattened dictionary of dotted keys of all attributes 

706 within the class. 

707 """ 

708 

709 if not self._fields: 

710 self._fields = pydantic_helpers.flatten_field_tree_map( 

711 pydantic_helpers.get_all_fields_serializable(self) 

712 ) 

713 return self._fields 

714 

715 def get_attribute_list(self) -> list[str]: 

716 """ 

717 return a list of the attributes 

718 

719 Returns 

720 ------- 

721 list[str] 

722 A list of attribute names 

723 """ 

724 

725 return sorted(self.get_all_fields().keys()) 

726 

727 @property 

728 def _required_fields(self) -> list[str]: 

729 """ 

730 Get a list of required fields according to metadata standards. 

731 

732 There is a distinction between "required" in Pydantic (must be defined 

733 on instantiation) and "required" in metadata standards (must be present 

734 in the standard even if the value is None). 

735 

736 Returns 

737 ------- 

738 list[str] 

739 List of required field names in the metadata standards 

740 

741 Notes 

742 ----- 

743 Required fields are determined by the 'required' flag in field metadata, 

744 not by Pydantic's required_on_init behavior. 

745 """ 

746 required_fields = [] 

747 for name, field_dict in self.get_all_fields().items(): 

748 required = field_dict.get("required", False) 

749 if required: 

750 required_fields.append(name) 

751 

752 return required_fields 

753 

754 def _field_info_to_string(self, name: str, field_dict: dict[str, Any]) -> str: 

755 """ 

756 Create a formatted string from field information for pretty printing. 

757 

758 Parameters 

759 ---------- 

760 name : str 

761 Name of the field 

762 field_dict : dict[str, Any] 

763 Dictionary containing field metadata (type, description, units, etc.) 

764 

765 Returns 

766 ------- 

767 str 

768 Formatted string representation of the field information 

769 

770 Examples 

771 -------- 

772 >>> info = {"type": "float", "description": "Latitude", "units": "degrees"} 

773 >>> result = obj._field_info_to_string("latitude", info) 

774 """ 

775 

776 line = [f"{name}:"] 

777 

778 for key, value in field_dict.items(): 

779 line.append(f"\t{key}: {value}") 

780 

781 return "\n".join(line) 

782 

783 def attribute_information(self, name: str | None = None) -> None: 

784 """ 

785 Print descriptive information about attributes. 

786 

787 If name is provided, prints information for that specific attribute. 

788 Otherwise, prints information for all attributes. 

789 

790 Parameters 

791 ---------- 

792 name : str | None, optional 

793 Attribute name for a specific attribute. If None, prints information 

794 for all attributes. Default is None. 

795 

796 Raises 

797 ------ 

798 MTSchemaError 

799 If the specified attribute name is not found 

800 

801 Examples 

802 -------- 

803 >>> metadata.attribute_information("latitude") 

804 >>> metadata.attribute_information() # Print all attributes 

805 """ 

806 attr_dict = self.get_all_fields() 

807 lines = [] 

808 if name: 

809 try: 

810 v_dict = attr_dict[name] 

811 except KeyError as error: 

812 msg = f"{error} not attribute {name} found." 

813 logger.error(msg) 

814 raise MTSchemaError(msg) 

815 lines.append(self._field_info_to_string(name, v_dict)) 

816 else: 

817 lines = [] 

818 for name, v_dict in attr_dict.items(): 

819 lines.append(self._field_info_to_string(name, v_dict)) 

820 lines.append("=" * 50) 

821 print("\n".join(lines)) 

822 

823 def get_attr_from_name(self, name: str) -> Any: 

824 """ 

825 Access attribute from the given name, supporting dot notation. 

826 

827 The name can contain nested object references separated by dots, 

828 e.g., 'location.latitude' or 'time_period.start'. 

829 

830 Parameters 

831 ---------- 

832 name : str 

833 Name of attribute to get, may include dots for nested attributes 

834 

835 Returns 

836 ------- 

837 Any 

838 The attribute value 

839 

840 Raises 

841 ------ 

842 KeyError 

843 If the attribute is not found 

844 AttributeError 

845 If the attribute path is invalid 

846 

847 Examples 

848 -------- 

849 >>> metadata = MetadataBase(**{'location.latitude': 45.0}) 

850 >>> metadata.get_attr_from_name('location.latitude') 

851 45.0 

852 

853 Notes 

854 ----- 

855 This is a helper function for names with '.' for easier access when 

856 reading from dictionaries or other flat structures. 

857 """ 

858 value, _ = helpers.recursive_split_getattr(self, name) 

859 return value 

860 

861 @deprecated( 

862 "set_attr_from_name will be deprecated in the future. Use update_attribute." 

863 ) 

864 def set_attr_from_name(self, name: str, value: Any) -> None: 

865 """ 

866 Helper function to set attribute from the given name. 

867 

868 The name can contain the name of an object which must be separated 

869 by a '.' for e.g. {object_name}.{name} --> location.latitude 

870 

871 .. note:: this is a helper function for names with '.' in the name for 

872 easier getting when reading from dictionary. 

873 

874 :param name: name of attribute to get. 

875 :type name: string 

876 :param value: attribute value 

877 :type value: type is defined by the attribute name 

878 

879 :Example: 

880 

881 >>> b = Base(**{'category.test_attr':10}) 

882 >>> b.set_attr_from_name('category.test_attr', '10') 

883 >>> print(b.category.test_attr) 

884 '10' 

885 """ 

886 

887 @deprecated("add_base_attribute is deprecated. Use add_new_field.") 

888 def add_base_attribute( 

889 self, 

890 ): 

891 pass 

892 

893 def add_new_field(self, name: str, new_field_info: FieldInfo) -> BaseModel: 

894 """ 

895 This is going to be much different from older versions of mt_metadata. 

896 

897 This will return a new BaseModel with the added attribute. Going to use 

898 `pydantid.create_model` from the exsiting attribute information and the 

899 added attribute. 

900 

901 Add an attribute to _attr_dict so it will be included in the 

902 output dictionary 

903 

904 Parameters 

905 ---------- 

906 name : str 

907 name of attribute 

908 new_field_info : FieldInfo 

909 value of the new attribute 

910 

911 Returns 

912 ------- 

913 BaseModel 

914 A new BaseModel instance with the added attribute. 

915 

916 Should include: 

917 

918 * annotated --> the data type [ str | int | float | bool ] 

919 * required --> required in the standards [ True | False ] 

920 * units --> units of the attribute, must be a string 

921 * alias --> other possible names for the attribute 

922 * options --> if only a few options are accepted, separated by | or 

923 comma.b [ option_01 | option_02 | other ]. 'other' means other options 

924 available but not yet defined. 

925 * example --> an example of the attribute 

926 

927 :Example: 

928 

929 .. code-block:: python 

930 

931 from pydantic.fields import FieldInfo 

932 new_field = FieldInfo( 

933 annotated=str, 

934 default="default_value", 

935 required=False, 

936 description="new field description", 

937 alias="new_field_alias", 

938 json_schema_extra={"units":"km"} 

939 ) 

940 

941 existing_basemodel = MetadataBase() 

942 new_basemodel = existing_basemodel.add_new_field("new_attribute", new_field) 

943 new_basemodel_object = new_basemodel() 

944 

945 """ 

946 existing_model_fields = self.__pydantic_fields__.copy() 

947 existing_model_fields[name] = new_field_info 

948 all_fields = {k: (v.annotation, v) for k, v in existing_model_fields.items()} 

949 

950 return create_model( # type: ignore 

951 self.__class__.__name__, # Preserve the original class name 

952 __base__=self.__class__, # Preserve the original class hierarchy 

953 **all_fields, 

954 ) 

955 

956 def to_dict( 

957 self, nested: bool = False, single: bool = False, required: bool = True 

958 ) -> dict[str, Any]: 

959 """ 

960 Convert metadata to a dictionary representation. 

961 

962 Parameters 

963 ---------- 

964 nested : bool, optional 

965 If True, return a nested dictionary structure. If False, use 

966 dot-notation for nested keys. Default is False. 

967 single : bool, optional 

968 If True, return just the metadata dictionary without the class name 

969 wrapper (meta_dict[class_name]). Default is False. 

970 required : bool, optional 

971 If True, return only required elements and elements with non-None 

972 values. If False, include all fields. Default is True. 

973 

974 Returns 

975 ------- 

976 dict[str, Any] 

977 Dictionary representation of the metadata 

978 

979 Notes 

980 ----- 

981 - Comment objects are converted to simple strings for backward compatibility 

982 when they only contain a value (no author or custom timestamp) 

983 - Numpy arrays, Enums, and nested MetadataBase objects are handled specially 

984 - Required fields are always included even if None 

985 

986 Examples 

987 -------- 

988 >>> metadata.to_dict(nested=True, single=True) 

989 >>> metadata.to_dict(required=False) # Include all fields 

990 """ 

991 

992 meta_dict = {} 

993 

994 # Keep track of processed comment attributes to avoid duplication 

995 processed_comments = set() 

996 

997 for name in self.get_attribute_list(): 

998 # Special handling for comment attributes for backwards compatibility 

999 if ( 

1000 ".value" in name 

1001 and name.replace(".value", "") not in processed_comments 

1002 ): 

1003 base_attr_name = name.replace(".value", "") 

1004 # Check if this is a comment attribute 

1005 try: 

1006 comment_obj = self.get_attr_from_name(base_attr_name) 

1007 if ( 

1008 hasattr(comment_obj, "__class__") 

1009 and comment_obj.__class__.__name__ == "Comment" 

1010 ): 

1011 # Check if this is a simple comment (only value set, no author or custom timestamp) 

1012 default_timestamp = "1980-01-01T00:00:00+00:00" 

1013 is_simple_comment = ( 

1014 hasattr(comment_obj, "value") 

1015 and comment_obj.value is not None 

1016 and isinstance(comment_obj.value, str) 

1017 and ( 

1018 not hasattr(comment_obj, "author") 

1019 or comment_obj.author is None 

1020 or comment_obj.author == "" 

1021 ) 

1022 and ( 

1023 not hasattr(comment_obj, "time_stamp") 

1024 or comment_obj.time_stamp is None 

1025 or str(comment_obj.time_stamp) == default_timestamp 

1026 ) 

1027 ) 

1028 

1029 if is_simple_comment and not nested: 

1030 # Use simple string format for backwards compatibility 

1031 if required: 

1032 if comment_obj.value not in [ 

1033 None, 

1034 "1980-01-01T00:00:00+00:00", 

1035 "1980", 

1036 [], 

1037 "", 

1038 ]: 

1039 meta_dict[base_attr_name] = str(comment_obj.value) 

1040 else: 

1041 meta_dict[base_attr_name] = str(comment_obj.value) 

1042 

1043 # Mark this comment as processed to skip its nested attributes 

1044 processed_comments.add(base_attr_name) 

1045 continue 

1046 else: 

1047 # Use nested format - let individual attributes be processed normally 

1048 pass 

1049 except (AttributeError, KeyError): 

1050 # Not a comment object or attribute doesn't exist, process normally 

1051 pass 

1052 

1053 # Skip nested comment attributes if we already processed the base comment 

1054 skip_attribute = False 

1055 for processed_comment in processed_comments: 

1056 if name.startswith(processed_comment + "."): 

1057 skip_attribute = True 

1058 break 

1059 

1060 if skip_attribute: 

1061 continue 

1062 

1063 try: 

1064 value = self.get_attr_from_name(name) 

1065 # Special handling for Comment objects for backwards compatibility 

1066 if ( 

1067 hasattr(value, "__class__") 

1068 and value.__class__.__name__ == "Comment" 

1069 ): 

1070 # Check if this is a simple comment (only value set, no author or custom timestamp) 

1071 default_timestamp = "1980-01-01T00:00:00+00:00" 

1072 is_simple_comment = ( 

1073 hasattr(value, "value") 

1074 and value.value is not None 

1075 and isinstance(value.value, str) 

1076 and ( 

1077 not hasattr(value, "author") 

1078 or value.author is None 

1079 or value.author == "" 

1080 ) 

1081 and ( 

1082 not hasattr(value, "time_stamp") 

1083 or value.time_stamp is None 

1084 or str(value.time_stamp) == default_timestamp 

1085 ) 

1086 ) 

1087 

1088 if is_simple_comment and not nested: 

1089 # Return simple string for backwards compatibility 

1090 value = str(value.value) 

1091 else: 

1092 # Return full nested format 

1093 value = value.to_dict(nested=nested, required=required) 

1094 elif hasattr(value, "to_dict"): 

1095 value = value.to_dict(nested=nested, required=required) 

1096 elif isinstance(value, dict): 

1097 for key, obj in value.items(): 

1098 if hasattr(obj, "to_dict"): 

1099 value[key] = obj.to_dict(nested=nested, required=required) 

1100 elif isinstance(obj, Enum): 

1101 value[key] = obj.value 

1102 else: 

1103 value[key] = obj 

1104 elif isinstance(value, list): 

1105 v_list = [] 

1106 for obj in value: 

1107 if hasattr(obj, "to_dict"): 

1108 v_list.append(obj.to_dict(nested=nested, required=required)) 

1109 elif isinstance(obj, Enum): 

1110 v_list.append(obj.value) 

1111 else: 

1112 v_list.append(obj) 

1113 value = v_list 

1114 elif isinstance(value, Enum): 

1115 value = value.value 

1116 elif hasattr(value, "unicode_string"): 

1117 value = value.unicode_string() 

1118 elif isinstance(value, (str, int, float, bool)): 

1119 value = value 

1120 except AttributeError as error: 

1121 logger.debug(error) 

1122 value = None 

1123 if required: 

1124 if isinstance(value, (np.ndarray)): 

1125 if name == "zeros" or name == "poles": 

1126 meta_dict[name] = value 

1127 elif value.all() != 0: 

1128 meta_dict[name] = value 

1129 elif hasattr(value, "size"): 

1130 if value.size > 0: 

1131 meta_dict[name] = value 

1132 elif ( 

1133 value not in [None, "1980-01-01T00:00:00+00:00", "1980", [], ""] 

1134 or name in self._required_fields 

1135 or helpers._should_include_coordinate_field(name) 

1136 or helpers._should_convert_none_to_empty_string(name) 

1137 ): 

1138 # Convert None coordinate fields to 0.0 for backward compatibility 

1139 if helpers._should_include_coordinate_field(name) and value is None: 

1140 value = 0.0 

1141 # Convert None string fields to empty string for backward compatibility 

1142 elif ( 

1143 helpers._should_convert_none_to_empty_string(name) 

1144 and value is None 

1145 ): 

1146 value = "" 

1147 meta_dict[name] = value 

1148 else: 

1149 meta_dict[name] = value 

1150 if nested: 

1151 meta_dict = helpers.structure_dict(meta_dict) 

1152 meta_dict = { 

1153 validate_name(self.__class__.__name__): OrderedDict( 

1154 sorted(meta_dict.items(), key=itemgetter(0)) 

1155 ) 

1156 } 

1157 

1158 if single: 

1159 meta_dict = meta_dict[list(meta_dict.keys())[0]] 

1160 return meta_dict 

1161 

1162 def from_dict(self, meta_dict: dict, skip_none: bool = False) -> None: 

1163 """ 

1164 Fill attributes from a dictionary. 

1165 

1166 The dictionary can be nested or flat with dot-notation keys. If the 

1167 dictionary has a single key matching the class name, it will be 

1168 unwrapped automatically. 

1169 

1170 Parameters 

1171 ---------- 

1172 meta_dict : dict 

1173 Dictionary with keys equal to metadata attribute names. Supports 

1174 both nested dictionaries and flat dictionaries with dot-notation keys. 

1175 skip_none : bool, optional 

1176 If True, skip attributes with None values. Default is False. 

1177 

1178 Raises 

1179 ------ 

1180 MTSchemaError 

1181 If the input is not a valid dictionary 

1182 

1183 Examples 

1184 -------- 

1185 >>> metadata.from_dict({"latitude": 45.0, "longitude": -120.0}) 

1186 >>> metadata.from_dict({"location": {"latitude": 45.0}}) 

1187 """ 

1188 if not isinstance(meta_dict, (dict, OrderedDict)): 

1189 msg = f"Input must be a dictionary not {type(meta_dict)}" 

1190 logger.error(msg) 

1191 raise MTSchemaError(msg) 

1192 keys = list(meta_dict.keys()) 

1193 if len(keys) == 1: 

1194 if isinstance(meta_dict[keys[0]], (dict, OrderedDict)): 

1195 class_name = keys[0] 

1196 if class_name.lower() != validate_name(self.__class__.__name__): 

1197 msg = ( 

1198 "name of input dictionary is not the same as class type " 

1199 f"input = {class_name}, class type = {self.__class__.__name__}" 

1200 ) 

1201 logger.debug(msg, class_name, self.__class__.__name__) 

1202 meta_dict = helpers.flatten_dict(meta_dict[class_name]) 

1203 else: 

1204 meta_dict = helpers.flatten_dict(meta_dict) 

1205 

1206 else: 

1207 logger.debug( 

1208 f"Assuming input dictionary is of type {self.__class__.__name__}", 

1209 ) 

1210 meta_dict = helpers.flatten_dict(meta_dict) 

1211 # set attributes by key. 

1212 for name, value in meta_dict.items(): 

1213 if skip_none: 

1214 if value in NULL_VALUES: 

1215 continue 

1216 self.update_attribute(name, value) 

1217 

1218 def to_json( 

1219 self, nested: bool = False, indent: str = " " * 4, required: bool = True 

1220 ) -> str: 

1221 """ 

1222 Write a json string from a given object, taking into account other 

1223 class objects contained within the given object. 

1224 

1225 Parameters 

1226 ---------- 

1227 indent : str 

1228 indentation for the json string, default is 4 spaces 

1229 

1230 nested : bool 

1231 make the returned json nested 

1232 required : bool 

1233 return just the required elements and any elements with non-None values 

1234 

1235 Returns 

1236 ------- 

1237 str 

1238 json string representation of the object 

1239 

1240 """ 

1241 

1242 return json.dumps( 

1243 self.to_dict(nested=nested, required=required), 

1244 cls=helpers.NumpyEncoder, 

1245 indent=indent, 

1246 ) 

1247 

1248 def from_json(self, json_str: str | Path) -> None: 

1249 """ 

1250 read in a json string and update attributes of an object 

1251 

1252 Parameters 

1253 ---------- 

1254 json_str : str | Path 

1255 json string or file path to json file 

1256 

1257 """ 

1258 if isinstance(json_str, str): 

1259 try: 

1260 json_path = Path(json_str) 

1261 if json_path.exists(): 

1262 with open(json_path, "r") as fid: 

1263 json_dict = json.load(fid) 

1264 except OSError: 

1265 pass 

1266 json_dict = json.loads(json_str) 

1267 elif isinstance(json_str, Path): 

1268 if json_str.exists(): 

1269 with open(json_str, "r") as fid: 

1270 json_dict = json.load(fid) 

1271 elif not isinstance(json_str, (str, Path)): 

1272 msg = f"Input must be valid JSON string not {type(json_str)}" 

1273 logger.error(msg) 

1274 raise MTSchemaError(msg) 

1275 self.from_dict(json_dict) 

1276 

1277 def from_series(self, pd_series: pd.Series) -> None: 

1278 """ 

1279 Fill attributes from a Pandas Series. 

1280 

1281 Parameters 

1282 ---------- 

1283 pd_series : pd.Series 

1284 Series containing metadata information. The series must be single 

1285 layered with key names separated by dots for nested attributes 

1286 (e.g., 'location.latitude'). 

1287 

1288 Raises 

1289 ------ 

1290 MTSchemaError 

1291 If the input is not a Pandas Series 

1292 

1293 Examples 

1294 -------- 

1295 >>> series = pd.Series({"latitude": 45.0, "longitude": -120.0}) 

1296 >>> metadata.from_series(series) 

1297 

1298 Notes 

1299 ----- 

1300 Types are not currently enforced from the series - validation occurs 

1301 via Pydantic after assignment. 

1302 """ 

1303 if not isinstance(pd_series, pd.Series): 

1304 msg = f"Input must be a Pandas.Series not type {type(pd_series)}" 

1305 logger.error(msg) 

1306 raise MTSchemaError(msg) 

1307 for key, value in pd_series.items(): 

1308 key = str(key) 

1309 self.update_attribute(key, value) 

1310 

1311 def to_series(self, required: bool = True) -> pd.Series: 

1312 """ 

1313 Convert attribute list to a pandas.Series 

1314 

1315 .. note:: this is a flattened version of the metadata 

1316 

1317 Parameters 

1318 ---------- 

1319 required : bool 

1320 return just the required elements and any elements with non-None values 

1321 

1322 Returns 

1323 ------- 

1324 pandas.Series 

1325 Series containing the metadata information 

1326 

1327 """ 

1328 

1329 return pd.Series(self.to_dict(single=True, required=required)) 

1330 

1331 def to_xml(self, string: bool = False, required: bool = True) -> str | et.Element: 

1332 """ 

1333 Convert metadata to an XML representation. 

1334 

1335 Creates an XML element with type and unit information for each attribute. 

1336 

1337 Parameters 

1338 ---------- 

1339 string : bool, optional 

1340 If True, return XML as a string. If False, return an XML Element. 

1341 Default is False. 

1342 required : bool, optional 

1343 If True, include only required elements and elements with non-None 

1344 values. If False, include all elements. Default is True. 

1345 

1346 Returns 

1347 ------- 

1348 str | et.Element 

1349 XML Element object if string=False, otherwise XML string 

1350 

1351 Examples 

1352 -------- 

1353 >>> xml_elem = metadata.to_xml() 

1354 >>> xml_str = metadata.to_xml(string=True) 

1355 """ 

1356 attr_dict = self.get_all_fields() 

1357 element = helpers.dict_to_xml( 

1358 self.to_dict(nested=True, required=required), attr_dict 

1359 ) 

1360 if not string: 

1361 return element 

1362 else: 

1363 return helpers.element_to_string(element) 

1364 

1365 def from_xml(self, xml_element: et.Element) -> None: 

1366 """ 

1367 Fill attributes from an XML element. 

1368 

1369 Parameters 

1370 ---------- 

1371 xml_element : et.Element 

1372 XML element from which to fill attributes. The element structure 

1373 should match the metadata schema. 

1374 

1375 Examples 

1376 -------- 

1377 >>> import xml.etree.ElementTree as et 

1378 >>> xml_str = '<metadata><latitude>45.0</latitude></metadata>' 

1379 >>> elem = et.fromstring(xml_str) 

1380 >>> metadata.from_xml(elem) 

1381 

1382 Notes 

1383 ----- 

1384 The XML element is converted to a dictionary first, then loaded 

1385 via the from_dict method. 

1386 """ 

1387 

1388 self.from_dict(helpers.element_to_dict(xml_element))