Coverage for src/dataknobs_xization/authorities.py: 37%
177 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-18 17:41 -0700
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-18 17:41 -0700
1"""Authority-based annotation processing and field grouping.
3Provides classes for managing authority-based annotations, field groups,
4and derived annotation columns for structured text extraction.
5"""
7import re
8from abc import ABC, abstractmethod
9from collections.abc import Callable
10from typing import Any, Dict, List, Set, Union
12import pandas as pd
14import dataknobs_xization.annotations as dk_annots
16# Key annotation column name constants
17KEY_AUTH_ID_COL = "auth_id"
20class DerivedFieldGroups(dk_annots.DerivedAnnotationColumns):
21 """Defines derived column types:
22 * "field_type" -- The column holding they type of field of an annotation row
23 * "field_group" -- The column holding the group number(s) of the field
24 * "field_record" -- The column holding record number(s) of the field
25 """
27 def __init__(
28 self,
29 field_type_suffix: str = "_field",
30 field_group_suffix: str = "_num",
31 field_record_suffix: str = "_recsnum",
32 ):
33 """Add derived column types/names: Given an annnotation row,
34 * field_type(row) == f'{row[ann_type_col]}_field'
35 * field_group(row) == f'{row[ann_type_col]}_num'
36 * field_record(row) == f'{row[ann_type_col])_recsnum'
38 Where:
39 * A field_type column holds annotation "sub"- type values, or fields
40 * A field_group column identifies groups of annotation fields
41 * A field_record column identifies groups of annotation field groups
43 Args:
44 field_type_suffix: The field_type col name suffix (if not _field).
45 field_group_suffix: The field_group col name suffix (if not _num).
46 field_record_suffix: field_record colname sfx (if not _recsnum).
47 """
48 self.field_type_suffix = field_type_suffix
49 self.field_group_suffix = field_group_suffix
50 self.field_record_suffix = field_record_suffix
52 def get_col_value(
53 self,
54 metadata: dk_annots.AnnotationsMetaData,
55 col_type: str,
56 row: pd.Series,
57 missing: str = None,
58 ) -> str:
59 """Get the value of the column in the given row derived from col_type,
60 where col_type is one of:
61 * "field_type" == f"{field}_field"
62 * "field_group" == f"{field}_num"
63 * "field_record" == f"{field}_recsnum"
65 And "field" is the row_accessor's metadata's "ann_type" col's value.
67 Args:
68 metadata: The AnnotationsMetaData.
69 col_type: The type of column value to derive.
70 row: A row from which to get the value.
71 missing: The value to return for unknown or missing column.
73 Returns:
74 The row value or the missing value.
75 """
76 value = missing
77 if metadata.ann_type_col in row.index:
78 field = row[metadata.ann_type_col]
79 if field is not None:
80 if col_type == "field_type":
81 col_name = self.get_field_type_col(field)
82 elif col_type == "field_group":
83 col_name = self.get_field_group_col(field)
84 elif col_type == "field_record":
85 col_name = self.get_field_record_col(field)
86 if col_name is not None and col_name in row.index:
87 value = row[col_name]
88 return value
90 def unpack_field(self, field_value: str) -> str:
91 """Given a field in any of its derivatives (like field type, field group
92 or field record,) unpack and return the basic field value itself.
93 """
94 field = field_value
95 if field.endswith(self.field_record_suffix):
96 field = field.replace(self.field_record_suffix, "")
97 elif field.endswith(self.field_group_suffix):
98 field = field.replace(self.field_group_suffix, "")
99 elif field.endswith(self.field_type_suffix):
100 field = field.replace(self.field_type_suffix, "")
101 return field
103 def get_field_name(self, field_value: str) -> str:
104 """Given a field name or field col name, e.g., an annotation type col's
105 value (the field name); or a field type, group, or record column name,
106 get the field name.
107 """
108 return self.unpack_field(field_value)
110 def get_field_type_col(self, field_value: str) -> str:
111 """Given a field name or field col name, e.g., an annotation type col's
112 value; or a field type, group, or record column name, get the field
113 name.
114 """
115 field = self.unpack_field(field_value)
116 return f"{field}{self.field_type_suffix}"
118 def get_field_group_col(self, field_value: str) -> str:
119 """Given a field name or field col name, e.g., an annotation type col's
120 value; or a field type, group, or record, get the name of the derived
121 field group column.
122 """
123 field = self.unpack_field(field_value)
124 return f"{field}{self.field_group_suffix}"
126 def get_field_record_col(self, field_value: str) -> str:
127 """Given a field name or field col name, e.g., an annotation type col's
128 value; or a field type, group, or record, get the name of the derived
129 field record column.
130 """
131 field = self.unpack_field(field_value)
132 return f"{field}{self.field_record_suffix}"
135class AuthorityAnnotationsMetaData(dk_annots.AnnotationsMetaData):
136 """An extension of AnnotationsMetaData that adds an 'auth_id_col' to the
137 standard (key) annotation columns (attributes).
138 """
140 def __init__(
141 self,
142 start_pos_col: str = dk_annots.KEY_START_POS_COL,
143 end_pos_col: str = dk_annots.KEY_END_POS_COL,
144 text_col: str = dk_annots.KEY_TEXT_COL,
145 ann_type_col: str = dk_annots.KEY_ANN_TYPE_COL,
146 auth_id_col: str = KEY_AUTH_ID_COL,
147 sort_fields: List[str] = (dk_annots.KEY_START_POS_COL, dk_annots.KEY_END_POS_COL),
148 sort_fields_ascending: List[bool] = (True, False),
149 **kwargs: Any,
150 ):
151 """Initialize with key (and more) column names and info.
153 Key column types:
154 * start_pos
155 * end_pos
156 * text
157 * ann_type
158 * auth_id
160 Note:
161 Actual table columns can be named arbitrarily, BUT interactions
162 through annotations classes and interfaces relating to the "key"
163 columns must use the key column constants.
165 Args:
166 start_pos_col: Col name for the token starting position.
167 end_pos_col: Col name for the token ending position.
168 text_col: Col name for the token text.
169 ann_type_col: Col name for the annotation types.
170 auth_id_col: Col name for the authority value ID.
171 sort_fields: The col types relevant for sorting annotation rows.
172 sort_fields_ascending: To specify sort order of sort_fields.
173 **kwargs: More column types mapped to column names.
174 """
175 super().__init__(
176 start_pos_col=start_pos_col,
177 end_pos_col=end_pos_col,
178 text_col=text_col,
179 ann_type_col=ann_type_col,
180 sort_fields=sort_fields,
181 sort_fields_ascending=sort_fields_ascending,
182 auth_id=auth_id_col,
183 **kwargs,
184 )
186 @property
187 def auth_id_col(self) -> str:
188 """Get the column name for the auth_id"""
189 return self.data[KEY_AUTH_ID_COL]
192class AuthorityAnnotationsBuilder(dk_annots.AnnotationsBuilder):
193 """An extension of an AnnotationsBuilder that adds the 'auth_id' column."""
195 def __init__(
196 self,
197 metadata: AuthorityAnnotationsMetaData = None,
198 data_defaults: Dict[str, Any] = None,
199 ):
200 """Initialize AuthorityAnnotationsBuilder.
202 Args:
203 metadata: The authority annotations metadata.
204 data_defaults: Dict[ann_colname, default_value] with default
205 values for annotation columns.
206 """
207 super().__init__(
208 metadata if metadata is not None else AuthorityAnnotationsMetaData(), data_defaults
209 )
211 def build_annotation_row(
212 self, start_pos: int, end_pos: int, text: str, ann_type: str, auth_id: str, **kwargs: Any
213 ) -> Dict[str, Any]:
214 """Build an annotation row with the mandatory key values and those from
215 the remaining keyword arguments.
217 For those kwargs whose names match metadata column names, override the
218 data_defaults and add remaining data_default attributes.
220 Args:
221 start_pos: The token start position.
222 end_pos: The token end position.
223 text: The token text.
224 ann_type: The annotation type.
225 auth_id: The authority ID for the row.
226 **kwargs: Additional keyword arguments.
228 Returns:
229 The result row dictionary.
230 """
231 return self.do_build_row(
232 {
233 self.metadata.start_pos_col: start_pos,
234 self.metadata.end_pos_col: end_pos,
235 self.metadata.text_col: text,
236 self.metadata.ann_type_col: ann_type,
237 self.metadata.auth_id_col: auth_id,
238 },
239 **kwargs,
240 )
243class AuthorityData:
244 """A wrapper for authority data."""
246 def __init__(self, df: pd.DataFrame, name: str):
247 self._df = df
248 self.name = name
250 @property
251 def df(self) -> pd.DataFrame:
252 """Get the authority data in a dataframe"""
253 return self._df
255 def lookup_values(self, value: Any, is_id: bool = False) -> pd.DataFrame:
256 """Lookup authority value(s) for the given value or value id.
258 Args:
259 value: A value or value_id for this authority.
260 is_id: True if value is an ID.
262 Returns:
263 The applicable authority dataframe rows.
264 """
265 col = self.df.index if is_id else self.df[self.name]
266 return self.df[col == value]
269class Authority(dk_annots.Annotator):
270 """A class for managing and defining tabular authoritative data for e.g.,
271 taxonomies, etc., and using them to annotate instances within text.
272 """
274 def __init__(
275 self,
276 name: str,
277 auth_anns_builder: AuthorityAnnotationsBuilder = None,
278 authdata: AuthorityData = None,
279 field_groups: DerivedFieldGroups = None,
280 anns_validator: Callable[["Authority", Dict[str, Any]], bool] = None,
281 parent_auth: "Authority" = None,
282 ):
283 """Initialize with this authority's metadata.
285 Args:
286 name: This authority's entity name.
287 auth_anns_builder: The authority annotations row builder to use
288 for building annotation rows.
289 authdata: The authority data.
290 field_groups: The derived field groups to use.
291 anns_validator: fn(auth, anns_dict_list) that returns True if
292 the list of annotation row dicts are valid to be added as
293 annotations for a single match or "entity".
294 parent_auth: This authority's parent authority (if any).
295 """
296 super().__init__(name)
297 self.anns_builder = (
298 auth_anns_builder if auth_anns_builder is not None else AuthorityAnnotationsBuilder()
299 )
300 self.authdata = authdata
301 self.field_groups = field_groups if field_groups is not None else DerivedFieldGroups()
302 self.anns_validator = anns_validator
303 self._parent = parent_auth
305 @property
306 def metadata(self) -> AuthorityAnnotationsMetaData:
307 """Get the meta-data"""
308 return self.anns_builder.metadata
310 @property
311 def parent(self) -> "Authority":
312 """Get this authority's parent, or None."""
313 return self._parent
315 @abstractmethod
316 def has_value(self, value: Any) -> bool:
317 """Determine whether the given value is in this authority.
319 Args:
320 value: A possible authority value.
322 Returns:
323 True if the value is a valid entity value.
324 """
325 raise NotImplementedError
327 def annotate_input(
328 self,
329 text_obj: Union[dk_annots.AnnotatedText, str],
330 **kwargs: Any,
331 ) -> dk_annots.Annotations:
332 """Find and annotate this authority's entities in the document text
333 as dictionaries like:
334 [
335 {
336 'input_id': <id>,
337 'start_pos': <start_char_pos>,
338 'end_pos': <end_char_pos>,
339 'entity_text': <entity_text>,
340 'ann_type': <authority_name>,
341 '<auth_id>': <auth_value_id_or_canonical_form>,
342 'confidence': <confidence_if_available>,
343 },
344 ]
346 Args:
347 text_obj: The text object or string to process.
348 **kwargs: Additional keyword arguments.
350 Returns:
351 An Annotations instance.
352 """
353 if text_obj is not None:
354 if isinstance(text_obj, str) and len(text_obj.strip()) > 0:
355 text_obj = dk_annots.AnnotatedText(
356 text_obj,
357 annots_metadata=self.metadata,
358 )
359 if text_obj is not None:
360 annotations = self.add_annotations(text_obj)
361 return annotations
363 @abstractmethod
364 def add_annotations(
365 self,
366 text_obj: dk_annots.AnnotatedText,
367 ) -> dk_annots.Annotations:
368 """Method to do the work of finding, validating, and adding annotations.
370 Args:
371 text_obj: The annotated text object to process and add annotations.
373 Returns:
374 The added Annotations.
375 """
376 raise NotImplementedError
378 def validate_ann_dicts(self, ann_dicts: List[Dict[str, Any]]) -> bool:
379 """The annotation row dictionaries are valid if:
380 * They are non-empty
381 * and
382 * either there is no annotations validator
383 * or they are valid according to the validator
385 Args:
386 ann_dicts: Annotation dictionaries.
388 Returns:
389 True if valid.
390 """
391 return len(ann_dicts) > 0 and (
392 self.anns_validator is None or self.anns_validator(self, ann_dicts)
393 )
395 def compose(
396 self,
397 annotations: dk_annots.Annotations,
398 ) -> dk_annots.Annotations:
399 """Compose annotations into groups.
401 Args:
402 annotations: The annotations.
404 Returns:
405 Composed annotations.
406 """
407 return annotations
409 def build_annotation(
410 self,
411 start_pos: int = None,
412 end_pos: int = None,
413 entity_text: str = None,
414 auth_value_id: Any = None,
415 conf: float = 1.0,
416 **kwargs,
417 ) -> Dict[str, Any]:
418 """Build annotations with the given components."""
419 return self.anns_builder.build_annotation_row(
420 start_pos, end_pos, entity_text, self.name, auth_value_id, auth_valconf=conf, **kwargs
421 )
424class AnnotationsValidator(ABC):
425 """A base class with helper functions for performing validations on annotation
426 rows.
427 """
429 def __call__(
430 self,
431 auth: Authority,
432 ann_row_dicts: List[Dict[str, Any]],
433 ) -> bool:
434 """Call function to enable instances of this type of class to be passed in
435 as a anns_validator function to an Authority.
437 Args:
438 auth: The authority proposing annotations.
439 ann_row_dicts: The proposed annotations.
441 Returns:
442 True if the annotations are valid; otherwise, False.
443 """
444 return self.validate_annotation_rows(
445 AnnotationsValidator.AuthAnnotations(auth, ann_row_dicts)
446 )
448 @abstractmethod
449 def validate_annotation_rows(
450 self,
451 auth_annotations: "AnnotationsValidator.AuthAnnotations",
452 ) -> bool:
453 """Determine whether the proposed authority annotation rows are valid.
455 Args:
456 auth_annotations: The AuthAnnotations instance with the
457 proposed data.
459 Returns:
460 True if valid; False if not.
461 """
462 raise NotImplementedError
464 class AuthAnnotations:
465 """A wrapper class for convenient access to the entity annotations."""
467 def __init__(self, auth: Authority, ann_row_dicts: List[Dict[str, Any]]):
468 self.auth = auth
469 self.ann_row_dicts = ann_row_dicts
470 self._row_accessor = None # AnnotationsRowAccessor
471 self._anns = None # Annotations
472 self._atts = None # Dict[str, str]
474 @property
475 def row_accessor(self) -> dk_annots.AnnotationsRowAccessor:
476 """Get the row accessor for this instance's annotations."""
477 if self._row_accessor is None:
478 self._row_accessor = dk_annots.AnnotationsRowAccessor(
479 self.auth.metadata, derived_cols=self.auth.field_groups
480 )
481 return self._row_accessor
483 @property
484 def anns(self) -> dk_annots.Annotations:
485 """Get this instance's annotation rows as an annotations object"""
486 if self._anns is None:
487 self._anns = dk_annots.Annotations(self.auth.metadata)
488 for row_dict in self.ann_row_dicts:
489 self._anns.add_dict(row_dict)
490 return self._anns
492 @property
493 def df(self) -> pd.DataFrame:
494 """Get the annotation's dataframe"""
495 return self.anns.df
497 def get_field_type(self, row: pd.Series) -> str:
498 """Get the entity field type value"""
499 return self.row_accessor.get_col_value("field_type", row, None)
501 def get_text(self, row: pd.Series) -> str:
502 """Get the entity text from the row"""
503 return self.row_accessor.get_col_value(self.auth.metadata.text_col, row, None)
505 @property
506 def attributes(self) -> Dict[str, str]:
507 """Get this instance's annotation entity attributes"""
508 if self._atts is None:
509 self._atts = {
510 self.get_field_type(row): self.get_text(row) for _, row in self.df.iterrows()
511 }
512 return self._atts
514 def colval(self, col_name, row) -> Any:
515 """Get the column's value from the given row"""
516 return self.row_accessor.get_col_value(col_name, row)
519class AuthorityFactory(ABC):
520 """A factory class for building an authority."""
522 @abstractmethod
523 def build_authority(
524 self,
525 name: str,
526 auth_anns_builder: AuthorityAnnotationsBuilder,
527 authdata: AuthorityData,
528 parent_auth: Authority = None,
529 ) -> Authority:
530 """Build an authority with the given name and data.
532 Args:
533 name: The authority name.
534 auth_anns_builder: The authority annotations row builder to use
535 for building annotation rows.
536 authdata: The authority data.
537 parent_auth: The parent authority.
539 Returns:
540 The authority.
541 """
542 raise NotImplementedError
545class LexicalAuthority(Authority):
546 """A class for managing named entities by ID with associated values and
547 variations.
548 """
550 def __init__(
551 self,
552 name: str,
553 auth_anns_builder: AuthorityAnnotationsBuilder = None,
554 authdata: AuthorityData = None,
555 field_groups: DerivedFieldGroups = None,
556 anns_validator: Callable[["Authority", Dict[str, Any]], bool] = None,
557 parent_auth: "Authority" = None,
558 ):
559 """Initialize with this authority's metadata.
561 Args:
562 name: This authority's entity name.
563 auth_anns_builder: The authority annotations row builder to use
564 for building annotation rows.
565 authdata: The authority data.
566 field_groups: The derived field groups to use.
567 anns_validator: fn(auth, anns_dict_list) that returns True if
568 the list of annotation row dicts are valid to be added as
569 annotations for a single match or "entity".
570 parent_auth: This authority's parent authority (if any).
571 """
572 super().__init__(
573 name,
574 auth_anns_builder=auth_anns_builder,
575 authdata=authdata,
576 field_groups=field_groups,
577 anns_validator=anns_validator,
578 parent_auth=parent_auth,
579 )
581 @abstractmethod
582 def get_value_ids(self, value: Any) -> Set[Any]:
583 """Get all IDs associated with the given value. Note that typically
584 there is a single ID for any value, but this allows for inherent
585 ambiguities in the authority.
587 Args:
588 value: An authority value.
590 Returns:
591 The associated IDs or an empty set if the value is not valid.
592 """
593 raise NotImplementedError
595 @abstractmethod
596 def get_values_by_id(self, value_id: Any) -> Set[Any]:
597 """Get all values for the associated value ID. Note that typically
598 there is a single value for an ID, but this allows for inherent
599 ambiguities in the authority.
601 Args:
602 value_id: An authority value ID.
604 Returns:
605 The associated values or an empty set if the value is not valid.
606 """
607 raise NotImplementedError
609 @abstractmethod
610 def get_id_by_variation(self, variation: str) -> Set[str]:
611 """Get the IDs of the value(s) associated with the given variation.
613 Args:
614 variation: Variation text.
616 Returns:
617 The possibly empty set of associated value IDS.
618 """
619 raise NotImplementedError
621 @abstractmethod
622 def find_variations(
623 self,
624 variation: str,
625 starts_with: bool = False,
626 ends_with: bool = False,
627 scope: str = "fullmatch",
628 ) -> pd.Series:
629 """Find all matches to the given variation.
631 Note:
632 Only the first true of starts_with, ends_with, and scope will
633 be applied. If none of these are true, a full match on the pattern
634 is performed.
636 Args:
637 variation: The text to find; treated as a regular expression
638 unless either starts_with or ends_with is True.
639 starts_with: When True, find all terms that start with the
640 variation text.
641 ends_with: When True, find all terms that end with the variation
642 text.
643 scope: 'fullmatch' (default), 'match', or 'contains' for
644 strict, less strict, and least strict matching.
646 Returns:
647 The matching variations as a pd.Series.
648 """
649 raise NotImplementedError
652class RegexAuthority(Authority):
653 """A class for managing named entities by ID with associated values and
654 variations.
655 """
657 def __init__(
658 self,
659 name: str,
660 regex: re.Pattern,
661 canonical_fn: Callable[[str, str], Any] = None,
662 auth_anns_builder: AuthorityAnnotationsBuilder = None,
663 authdata: AuthorityData = None,
664 field_groups: DerivedFieldGroups = None,
665 anns_validator: Callable[[Authority, Dict[str, Any]], bool] = None,
666 parent_auth: "Authority" = None,
667 ):
668 """Initialize with this authority's entity name.
670 Note:
671 If the regular expression has capturing groups, each group
672 will result in a separate entity, with the group name if provided
673 in the regular expression as ...(?P<group_name>group_regex)...
675 Args:
676 name: The authority name.
677 regex: The regular expression to apply.
678 canonical_fn: A function, fn(match_text, group_name), to
679 transform input matches to a canonical form as a value_id.
680 Where group_name will be None and the full match text will be
681 passed in if there are no group names. Note that the canonical form
682 is computed before the match_validator is applied and its value
683 will be found as the value to the <auth_id> key.
684 auth_anns_builder: The authority annotations row builder to use
685 for building annotation rows.
686 authdata: The authority data.
687 field_groups: The derived field groups to use.
688 anns_validator: A validation function for each regex match
689 formed as a list of annotation row dictionaries, one row dictionary
690 for each matching regex group. If the validator returns False,
691 then the annotation rows will be rejected. The entity_text key
692 will hold matched text and the <auth_name>_field key will hold
693 the group name or number (if there are groups with or without names)
694 or the <auth_name> if there are no groups in the regular expression.
695 Note that the validator function takes the regex authority instance
696 as its first parameter to provide access to the field_groups, etc.
697 The validation_fn signature is: fn(regexAuthority, ann_row_dicts)
698 and returns a boolean.
699 parent_auth: This authority's parent authority (if any).
700 """
701 super().__init__(
702 name,
703 auth_anns_builder=auth_anns_builder,
704 authdata=authdata,
705 field_groups=field_groups,
706 anns_validator=anns_validator,
707 parent_auth=parent_auth,
708 )
709 self.regex = regex
710 self.canonical_fn = canonical_fn
712 def has_value(self, value: Any) -> re.Match:
713 """Determine whether the given value is in this authority.
715 Args:
716 value: A possible authority value.
718 Returns:
719 None if the value is not a valid entity value; otherwise,
720 return the re.Match object.
721 """
722 return self.regex.match(str(value))
724 def add_annotations(
725 self,
726 text_obj: dk_annots.AnnotatedText,
727 ) -> dk_annots.Annotations:
728 """Method to do the work of finding, validating, and adding annotations.
730 Args:
731 text_obj: The annotated text object to process and add annotations.
733 Returns:
734 The added Annotations.
735 """
736 for match in re.finditer(self.regex, text_obj.text):
737 ann_dicts = []
738 if match.lastindex is not None:
739 if len(self.regex.groupindex) > 0: # we have named groups
740 for group_name, group_num in self.regex.groupindex.items():
741 group_text = match.group(group_num)
742 kwargs = {self.field_groups.get_field_type_col(self.name): group_name}
743 ann_dicts.append(
744 self.build_annotation(
745 start_pos=match.start(group_name),
746 end_pos=match.end(group_name),
747 entity_text=group_text,
748 auth_value_id=self.get_canonical_form(group_text, group_name),
749 **kwargs,
750 )
751 )
752 else: # we have only numbers for groups
753 for group_num, group_text in enumerate(match.groups()):
754 group_num += 1
755 kwargs = {self.field_groups.get_field_type_col(self.name): group_num}
756 ann_dicts.append(
757 self.build_annotation(
758 start_pos=match.start(group_num),
759 end_pos=match.end(group_num),
760 entity_text=group_text,
761 auth_value_id=self.get_canonical_form(group_text, group_num),
762 **kwargs,
763 )
764 )
765 else: # we have no groups
766 ann_dicts.append(
767 self.build_annotation(
768 start_pos=match.start(),
769 end_pos=match.end(),
770 entity_text=match.group(),
771 auth_value_id=self.get_canonical_form(match.group(), self.name),
772 )
773 )
774 if self.validate_ann_dicts(ann_dicts):
775 # Add non-empty, valid annotation dicts to the result
776 text_obj.annotations.add_dicts(ann_dicts)
777 return text_obj.annotations
779 def get_canonical_form(self, entity_text: str, entity_type: str) -> Any:
780 if self.canonical_fn is not None:
781 entity_text = self.canonical_fn(entity_text, entity_type)
782 return entity_text
785class AuthoritiesBundle(Authority):
786 """An authority for expressing values through multiple bundled "authorities"
787 like dictionary-based and/or multiple regular expression patterns.
788 """
790 def __init__(
791 self,
792 name: str,
793 auth_anns_builder: AuthorityAnnotationsBuilder = None,
794 authdata: AuthorityData = None,
795 field_groups: DerivedFieldGroups = None,
796 parent_auth: "Authority" = None,
797 anns_validator: Callable[["Authority", Dict[str, Any]], bool] = None,
798 auths: List[Authority] = None,
799 ):
800 """Initialize the AuthoritiesBundle.
802 Args:
803 name: This authority's entity name.
804 auth_anns_builder: The authority annotations row builder to use
805 for building annotation rows.
806 authdata: The authority data.
807 field_groups: The derived field groups to use.
808 anns_validator: fn(auth, anns_dict_list) that returns True if
809 the list of annotation row dicts are valid to be added as
810 annotations for a single match or "entity".
811 parent_auth: This authority's parent authority (if any).
812 auths: The authorities to bundle together.
813 """
814 super().__init__(
815 name,
816 auth_anns_builder=auth_anns_builder,
817 authdata=authdata,
818 field_groups=field_groups,
819 anns_validator=anns_validator,
820 parent_auth=parent_auth,
821 )
822 self.auths = auths.copy() if auths is not None else []
824 def add(self, auth: Authority):
825 """Add the authority to this bundle.
827 Args:
828 auth: The authority to add.
829 """
830 self.auths.append(auth)
832 def has_value(self, value: Any) -> bool:
833 """Determine whether the given value is in this authority.
835 Args:
836 value: A possible authority value.
838 Returns:
839 True if the value is a valid entity value.
840 """
841 for auth in self.auths:
842 if auth.has_value(value):
843 return True
844 return False
846 def add_annotations(
847 self,
848 text_obj: dk_annots.AnnotatedText,
849 ) -> dk_annots.Annotations:
850 """Method to do the work of finding, validating, and adding annotations.
852 Args:
853 text_obj: The annotated text object to process and add annotations.
855 Returns:
856 The added Annotations.
857 """
858 for auth in self.auths:
859 auth.annotate_input(text_obj)
860 return text_obj.annotations