Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mt_metadata \ mt_metadata \ utils \ validators.py: 50%
300 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:11 -0800
1# -*- coding: utf-8 -*-
2"""
3=======================
4schema
5=======================
7Convenience Classes and Functions to deal with the base metadata standards
8described by the csv file.
10The hope is that only the csv files will need to be changed as the standards
11are modified. The attribute dictionaries are stored in ATTRICT
13Created on Wed Apr 29 11:11:31 2020
15@author: jpeacock
16"""
17import re
19# =============================================================================
20# Imports
21# =============================================================================
22import sys
23from collections.abc import Iterable
25import numpy as np
26from loguru import logger
27from pydantic import HttpUrl
29from mt_metadata import ACCEPTED_STYLES, REQUIRED_KEYS
30from mt_metadata.utils.exceptions import MTSchemaError, MTValidatorError
33# from mt_metadata.common.comment import Comment
36# =============================================================================
37# validator functions
38# =============================================================================
41def validate_doi(value: str | HttpUrl | None) -> HttpUrl | None:
42 """
43 Validate a DOI string.
45 Parameters
46 ----------
47 value : str
48 The DOI string to validate.
50 Returns
51 -------
52 str
53 The validated DOI string.
55 Raises
56 ------
57 ValueError
58 If the DOI string is not valid.
59 """
60 if value is None:
61 return None
62 elif isinstance(value, str):
63 if value == "":
64 return None
65 if value.startswith("10."):
66 value = f"https://doi.org/{value}"
67 elif value.startswith("doi:"):
68 value = f"https://doi.org/{value.replace('doi:', '')}"
69 value = HttpUrl(value)
70 elif isinstance(value, HttpUrl):
71 pass
72 # Check if the URL starts with a valid DOI prefix
73 if not value.unicode_string().startswith("https://doi.org/"):
74 if not value.unicode_string().startswith("https://dx.doi.org/"):
75 raise ValueError(f"Invalid DOI: {value}")
77 return value
80# def validate_comments(comments: str | Comment | None) -> Comment | None:
81# """
82# Validate comments string.
84# Parameters
85# ----------
86# comments : str | None
87# The comments to validate.
89# Returns
90# -------
91# str | None
92# The validated comments string or None if empty.
93# """
94# if isinstance(comments, str):
95# return Comment(value=comments) # type: ignore
96# return comments
99def validate_header(header, attribute=False):
100 """
101 validate header to make sure it includes the required keys:
102 * 'attribute'
103 * 'type'
104 * 'required'
105 * 'style'
106 * 'units'
108 Parameters
109 ----------
110 header : list
111 list of header names
112 attribute : bool, optional
113 include attribute in test or not, by default False
115 Returns
116 -------
117 list
118 validated header
119 """
120 if not isinstance(header, list):
121 msg = "input header must be a list, not {type(header)}"
122 raise MTValidatorError(msg)
124 if attribute:
125 if sorted(header) != sorted(REQUIRED_KEYS):
126 msg = (
127 f"Keys is not correct, must include {REQUIRED_KEYS}"
128 + f". Currently has {header}"
129 )
130 raise MTValidatorError(msg)
131 else:
132 required_keys = [key for key in REQUIRED_KEYS if key != "attribute"]
133 if sorted(header) != sorted(required_keys):
134 missing_keys = [x for x in required_keys if x not in header]
135 msg = (
136 f"Keys is not correct, must include {required_keys}\n"
137 + f". Currently has {header}\n"
138 + f"Need to add keys: {missing_keys}"
139 )
140 raise MTValidatorError(msg)
141 return header
144def validate_name(name):
145 """
146 validate the name to conform to the standards
147 name must be:
149 * all lower case {a-z; 1-9}
150 * must start with a letter
151 * categories are separated by '.'
152 * words separated by '_'
154 {object}.{name_name}
156 '/' will be replaced with '.'
157 converted to all lower case
159 Parameters
160 ----------
161 name : str
162 name name
164 Returns
165 -------
166 str
167 valid name name
168 """
169 if not isinstance(name, str):
170 msg = f"Attribute name must be a string, not {type(name)}"
171 raise MTValidatorError(msg)
173 original = str(name)
175 if re.match("^[0-9]", name):
176 msg = f"Attribute name cannot start with a number, {original}"
177 raise MTValidatorError(msg)
179 if "/" in name:
180 name = name.replace("/", ".")
182 if re.search("[A-Z].*?", name):
183 name = "_".join(re.findall(".[^A-Z]*", name))
184 name = name.replace("._", ".")
185 name = name.lower()
187 if original != name:
188 msg = "input name {0} converted to {1} following MTH5 standards"
190 return name
193def validate_station_name(name: str | int | float) -> str:
194 """
195 validate station name to conform to general standards
197 - must be a string
198 - must only contain letters, numbers, and underscores
200 Parameters
201 ----------
202 name : str | int | float
203 The station name to validate
205 Returns
206 -------
207 str
208 The validated station name
210 Raises
211 ------
212 MTValidatorError
213 If name is not a string or contains invalid characters
214 """
215 name = str(name).strip()
216 original = str(name)
218 # Replace spaces with underscores
219 name = name.replace(" ", "_").replace("-", "_")
221 # Test if string contains only letters, numbers, and underscores
222 if not re.match(r"^[a-zA-Z0-9_]+$", name):
223 msg = f"Station name '{original}' contains invalid characters. Only letters, numbers, and underscores are allowed."
224 raise MTValidatorError(msg)
226 return name
229def validate_attribute(name):
230 """
231 validate the name to conform to the standards
232 name must be:
234 * all lower case {a-z; 1-9}
235 * must start with a letter
236 * categories are separated by '.'
237 * words separated by '_'
239 {object}.{name_name}
241 '/' will be replaced with '.'
242 converted to all lower case
244 Parameters
245 ----------
246 name : str
247 name name
249 Returns
250 -------
251 str
252 valid name name
253 """
254 if not isinstance(name, str):
255 msg = f"Attribute name must be a string, not {type(name)}"
256 raise MTValidatorError(msg)
258 original = str(name)
260 if re.match("^[0-9]", name):
261 msg = f"Attribute name cannot start with a number, {original}"
262 raise MTValidatorError(msg)
264 if "/" in name:
265 name = name.replace("/", ".")
267 if re.search("[A-Z].*?", name):
268 name = "_".join(re.findall(".[^A-Z]*", name))
269 name = name.replace("._", ".")
270 name = name.lower()
272 if original != name:
273 msg = "input name {0} converted to {1} following MTH5 standards"
275 return name
278def validate_required(value):
279 """
280 Validate required, must be True or False
282 Parameters
283 ----------
284 value : str or bool
285 required value
287 Returns
288 -------
289 bool
290 validated required value
291 """
292 if isinstance(value, bool):
293 return value
295 if isinstance(value, str):
296 if value.lower() in ["false"]:
297 return False
298 elif value.lower() in ["true"]:
299 return True
300 else:
301 msg = "Required value must be True or False, not {value}"
302 raise MTValidatorError(msg)
303 else:
304 msg = "Required value must be True or False, not {type(value)}"
305 raise MTValidatorError(msg)
308def validate_type(value):
309 """
310 Validate required type. Must be:
311 * str
312 * float
313 * int
314 * bool
315 * list
316 * dict
317 * object
319 Parameters
320 ----------
321 value : type or str
322 required type
324 Returns
325 -------
326 str
327 validated type
328 """
329 if isinstance(value, type):
330 value = "{0}".format(value).replace("<class", "").replace(">", "")
332 if isinstance(value, str):
333 value = value.replace("<class", "").replace(">", "")
334 if "int" in value.lower():
335 return "integer"
336 elif "float" in value.lower():
337 return "float"
338 elif "str" in value.lower():
339 return "string"
340 elif "bool" in value.lower():
341 return "boolean"
342 elif "list" in value.lower() or "array" in value.lower():
343 return "list"
344 elif "dict" in value.lower():
345 return "dict"
346 elif "object" in value.lower():
347 return "object"
348 elif "h5py_reference" in value.lower():
349 return value
351 else:
352 msg = (
353 "'type' must be type [ int | float "
354 + f"| str | bool | list | dict | object ]. Not {value}"
355 )
356 raise MTValidatorError(msg)
357 else:
358 msg = (
359 "'type' must be type [ int | float "
360 + f"| str | bool | list | dict | object ] or string. Not {value}"
361 )
362 raise MTValidatorError(msg)
365def validate_units(value):
366 """
367 Validate units
369 ..todo:: make a list of acceptable unit names
371 Parameters
372 ----------
373 value : str
374 unit value to be validated
376 Returns
377 -------
378 str
379 validated units
380 """
381 if value is None:
382 return value
383 if isinstance(value, str):
384 if value.lower() in ["none", "empty", ""]:
385 return None
386 else:
387 return value.lower()
388 else:
389 msg = f"'units' must be a string or None, not {type(value)}"
390 raise MTValidatorError(msg)
393def validate_style(value):
394 """
395 Validate string style
397 ..todo:: make list of accepted style formats
399 Parameters
400 ----------
401 value : str
402 style to be validated
404 Returns
405 -------
406 str
407 validated style
408 """
409 # if None then return the generic name style
410 if value is None:
411 return "name"
413 if not isinstance(value, str):
414 msg = f"'value' must be a string. Not {type(value)}"
415 raise MTValidatorError(msg)
417 if value.lower() not in ACCEPTED_STYLES:
418 msg = f"style {value} unknown, must be in {ACCEPTED_STYLES}"
419 raise MTValidatorError(msg)
421 return value.lower()
424def validate_description(description):
425 """
426 make sure the description is a string
428 Parameters
429 ----------
430 description : str
431 detailed description of an attribute
433 Returns
434 -------
435 str
436 validated string of description
437 """
438 if not isinstance(description, str):
439 msg = f"Description must be a string, not {type(description)}"
440 raise MTValidatorError(msg)
442 return description
445def validate_options(options):
446 """
447 turn options into a list of strings
449 Parameters
450 ----------
451 options : TYPE
452 DESCRIPTION
454 Returns
455 -------
456 TYPE
457 DESCRIPTION
458 """
459 if isinstance(options, str):
460 options = options.replace("[", "").replace("]", "").strip().split("|")
461 names = []
462 for name in options:
463 if not name.lower() in ["none", ""]:
464 names.append(name.strip())
465 options = names
467 elif isinstance(options, (list, tuple)):
468 options = [str(option) for option in options]
469 elif isinstance(options, (float, int, bool)):
470 options = ["{0}".format(options)]
472 else:
473 msg = "Option type not understood {type(options)}"
474 raise MTValidatorError(msg)
475 return options
478def validate_alias(alias):
479 """
480 validate alias names
482 Parameters
483 ----------
484 alias : TYPE
485 DESCRIPTION
487 Returns
488 -------
489 TYPE
490 DESCRIPTION
491 """
493 if isinstance(alias, str):
494 alias = alias.replace("[", "").replace("]", "").strip().split("|")
495 names = []
496 for name in alias:
497 if not name.lower() in ["none", ""]:
498 names.append(name.strip())
499 alias = names
501 elif isinstance(alias, (list, tuple)):
502 alias = [str(option) for option in alias]
503 elif isinstance(alias, (float, int, bool)):
504 alias = [f"{alias}"]
506 else:
507 msg = f"Alias type not understood {alias}"
508 raise MTValidatorError(msg)
509 return alias
512def validate_example(example):
513 """
514 Validate example values
516 Parameters
517 ----------
518 example : TYPE
519 DESCRIPTION
521 Returns
522 -------
523 TYPE
524 DESCRIPTION
525 """
526 if not isinstance(example, str):
527 example = "{0}".format(example)
528 return example
531def validate_default(value_dict):
532 """
533 validate default value
535 Parameters
536 ----------
537 value_dict : TYPE
538 DESCRIPTION
540 Returns
541 -------
542 TYPE
543 DESCRIPTION
544 """
546 if value_dict["required"]:
547 if value_dict["default"] in [None]:
548 if "list" in value_dict["style"]:
549 value = []
550 elif "date" in value_dict["style"] or "time" in value_dict["style"]:
551 value = "1980-01-01T00:00:00+00:00"
552 elif "controlled" in value_dict["style"]:
553 if "other" in value_dict["options"]:
554 value = None
555 else:
556 value = value_dict["options"][0]
557 else:
558 if value_dict["type"] in ["integer", "float", int, float]:
559 value = 0
560 elif value_dict["type"] in ["string", str]:
561 value = "none"
562 elif value_dict["type"] in ["bool", bool]:
563 value = False
564 elif value_dict["type"] in ["h5py_reference"]:
565 value = None
566 else:
567 value = validate_value_type(
568 value_dict["default"], value_dict["type"], value_dict["style"]
569 )
571 else:
572 if "date" in value_dict["style"] or "time" in value_dict["style"]:
573 value = "1980-01-01T00:00:00+00:00"
574 else:
575 value = None
576 return value
579def validate_value_type(value, v_type, style=None):
580 """
581 validate type from standards
583 Parameters
584 ----------
585 value : TYPE
586 DESCRIPTION
587 v_type : TYPE
588 DESCRIPTION
589 style : TYPE, optional
590 DESCRIPTION, by default None
592 Returns
593 -------
594 TYPE
595 DESCRIPTION
596 """
598 # if the value is a metadata type skip cause the individual components
599 # will be validated separately
600 if "metadata" in str(type(value)):
601 return value
602 # return if the value is None, this may need to change in the future
603 # if an empty list or something else should be returned
604 if not isinstance(value, (list, tuple, np.ndarray)):
605 if value in [None, "None", "none", "unknown"]:
606 return None
607 # hack to get around h5py reference types, in the future will need
608 # a more robust test.
609 if v_type == "h5py_reference":
610 return value
612 # return value if the value type is not defined.
613 if v_type is None:
614 msg = (
615 "standards data type is unknown, if you want to "
616 + "propogate this attribute using to_dict, to_json or "
617 + "to_series, you need to add attribute description using "
618 + "class function add_base_attribute."
619 )
620 logger.warning(msg)
621 return value
623 # if not a python type but a string organize into a dictionary
624 if not isinstance(v_type, type) and isinstance(v_type, str):
625 type_dict = {
626 "string": str,
627 "integer": int,
628 "float": float,
629 "boolean": bool,
630 "list": list,
631 "dict": dict,
632 "object": object,
633 }
634 v_type = type_dict[validate_type(v_type)]
635 else:
636 msg = "v_type must be a string or type not {0}".format(v_type)
638 # check style for a list, if it is split the string
639 if style:
640 if "list" in style and isinstance(value, str):
641 delimeter = " "
642 if value.count(",") > 0:
643 delimeter = ","
644 elif value.strip().count(" ") > 0:
645 delimeter = " "
646 elif value.count(";") > 0:
647 delimeter = ";"
648 value = value.replace("[", "").replace("]", "").split(delimeter)
649 value = [ss.strip() for ss in value]
651 # if value is not of v_type
652 if not isinstance(value, v_type):
653 msg = "value=%s must be %s not %s"
654 # if the value is a string, convert to appropriate type
655 if isinstance(value, str):
656 if v_type is int:
657 try:
658 if value.lower() in ["none", "nan", ""]:
659 return None
660 return int(value)
661 except ValueError:
662 raise MTSchemaError(msg, value, v_type, type(value))
663 elif v_type is float:
664 try:
665 return float(value)
666 except ValueError:
667 raise MTSchemaError(msg, value, v_type, type(value))
668 elif v_type is bool:
669 if value.lower() in ["false", "0"]:
670 return False
671 elif value.lower() in ["true", "1"]:
672 return True
673 else:
674 raise MTSchemaError(msg, value, v_type, type(value))
675 elif v_type is str:
676 return value
678 # if a number convert to appropriate type
679 elif isinstance(value, (int, np.int_, np.int64, np.int32, np.int16, np.int8)):
680 if v_type is float:
681 return float(value)
682 elif v_type is str:
683 return "{0:.0f}".format(value)
684 return int(value)
686 # if a number convert to appropriate type
687 elif isinstance(value, (float, np.float16, np.float32, np.float64)):
688 if v_type is int:
689 return int(value)
690 elif v_type is str:
691 return f"{value}"
692 return float(value)
694 # if a list convert to appropriate entries to given type
695 elif isinstance(value, Iterable):
696 if v_type is str:
697 if isinstance(value, np.ndarray):
698 value = value.astype(np.str_)
699 value = [f"{v}".replace("'", "").replace('"', "") for v in value]
700 elif v_type is int:
701 value = [int(float(v)) for v in value]
702 elif v_type is float:
703 value = [float(v) for v in value]
704 elif v_type is bool:
705 value_list = []
706 for v in value:
707 if v in [True, "true", "True", "TRUE", 1, "1"]:
708 value_list.append(True)
709 elif v in [False, "false", "False", "FALSE", 0, "0"]:
710 value_list.append(False)
711 value = value_list
712 return value
714 elif isinstance(value, (np.bool_)):
715 return bool(value)
717 else:
718 raise MTSchemaError(msg, value, v_type, type(value))
719 else:
720 return value
723def validate_value_dict(value_dict):
724 """
725 Validate an input value dictionary
727 Must be of the form:
728 {'type': str, 'required': True, 'style': 'name', 'units': units}
730 :param value_dict: DESCRIPTION
731 :type value_dict: TYPE
732 :return: DESCRIPTION
733 :rtype: TYPE
735 """
736 if not isinstance(value_dict, dict):
737 if isinstance(value_dict, type(logger)):
738 return value_dict
739 msg = f"Input must be a dictionary, not {type(value_dict)}"
740 raise MTValidatorError(msg)
742 header = validate_header(list(value_dict.keys()))
743 # loop over validating functions in this module
744 for key in header:
745 if key == "default":
746 continue
747 try:
748 value_dict[key] = getattr(sys.modules[__name__], f"validate_{key}")(
749 value_dict[key]
750 )
751 except KeyError:
752 raise KeyError("Could not find {key} for validator {__name__}")
754 # need to validate the default value after all other keys have been validated
755 value_dict["default"] = validate_default(value_dict)
757 return value_dict