Coverage for src/hdmf/container.py: 94%
837 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
1import types
2from abc import abstractmethod
3from collections import OrderedDict
4from copy import deepcopy
5from uuid import uuid4
6from warnings import warn
8import h5py
9import numpy as np
10import pandas as pd
12from .data_utils import DataIO, append_data, extend_data
13from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
14from hdmf.term_set import TermSet
17def _set_exp(cls):
18 """Set a class as being experimental"""
19 cls._experimental = True
22def _exp_warn_msg(cls):
23 """Generate a warning message experimental features"""
24 pfx = cls
25 if isinstance(cls, type): 25 ↛ 27line 25 didn't jump to line 27, because the condition on line 25 was never false
26 pfx = cls.__name__
27 msg = ('%s is experimental -- it may be removed in the future and '
28 'is not guaranteed to maintain backward compatibility') % pfx
29 return msg
32class ExternalResourcesManager:
33 """
34 This class manages whether to set/attach an instance of ExternalResources to the subclass.
35 """
37 @docval({'name': 'external_resources', 'type': 'ExternalResources',
38 'doc': 'The external resources to be used for the container.'},)
39 def link_resources(self, **kwargs):
40 """
41 Method to attach an instance of ExternalResources in order to auto-add terms/references to data.
42 """
43 self._external_resources = kwargs['external_resources']
45 def get_linked_resources(self):
46 return self._external_resources if hasattr(self, "_external_resources") else None
49class AbstractContainer(metaclass=ExtenderMeta):
50 # The name of the class attribute that subclasses use to autogenerate properties
51 # This parameterization is supplied in case users would like to configure
52 # the class attribute name to something domain-specific
54 _experimental = False
56 _fieldsname = '__fields__'
58 _data_type_attr = 'data_type'
60 # Subclasses use this class attribute to add properties to autogenerate
61 # Autogenerated properties will store values in self.__field_values
62 __fields__ = tuple()
64 # This field is automatically set by __gather_fields before initialization.
65 # It holds all the values in __fields__ for this class and its parent classes.
66 __fieldsconf = tuple()
68 _pconf_allowed_keys = {'name', 'doc', 'settable'}
70 # Override the _setter factor function, so directives that apply to
71 # Container do not get used on Data
72 @classmethod
73 def _setter(cls, field):
74 """
75 Make a setter function for creating a :py:func:`property`
76 """
77 name = field['name']
79 if not field.get('settable', True):
80 return None
82 def setter(self, val):
83 if val is None:
84 return
85 if name in self.fields:
86 msg = "can't set attribute '%s' -- already set" % name
87 raise AttributeError(msg)
88 self.fields[name] = val
90 return setter
92 @classmethod
93 def _getter(cls, field):
94 """
95 Make a getter function for creating a :py:func:`property`
96 """
97 doc = field.get('doc')
98 name = field['name']
100 def getter(self):
101 return self.fields.get(name)
103 setattr(getter, '__doc__', doc)
104 return getter
106 @staticmethod
107 def _check_field_spec(field):
108 """
109 A helper function for __gather_fields to make sure we are always working
110 with a dict specification and that the specification contains the correct keys
111 """
112 tmp = field
113 if isinstance(tmp, dict):
114 if 'name' not in tmp:
115 raise ValueError("must specify 'name' if using dict in __fields__")
116 else:
117 tmp = {'name': tmp}
118 return tmp
120 @classmethod
121 def _check_field_spec_keys(cls, field_conf):
122 for k in field_conf:
123 if k not in cls._pconf_allowed_keys:
124 msg = ("Unrecognized key '%s' in %s config '%s' on %s"
125 % (k, cls._fieldsname, field_conf['name'], cls.__name__))
126 raise ValueError(msg)
128 @classmethod
129 def _get_fields(cls):
130 return getattr(cls, cls._fieldsname)
132 @classmethod
133 def _set_fields(cls, value):
134 return setattr(cls, cls._fieldsname, value)
136 @classmethod
137 def get_fields_conf(cls):
138 return cls.__fieldsconf
140 @ExtenderMeta.pre_init
141 def __gather_fields(cls, name, bases, classdict):
142 '''
143 This classmethod will be called during class declaration in the metaclass to automatically
144 create setters and getters for fields that need to be exported
145 '''
146 fields = cls._get_fields()
147 if not isinstance(fields, tuple):
148 msg = "'%s' must be of type tuple" % cls._fieldsname
149 raise TypeError(msg)
151 # check field specs and create map from field name to field conf dictionary
152 fields_dict = OrderedDict()
153 for f in fields:
154 pconf = cls._check_field_spec(f)
155 cls._check_field_spec_keys(pconf)
156 fields_dict[pconf['name']] = pconf
157 all_fields_conf = list(fields_dict.values())
159 # check whether this class overrides __fields__
160 if len(bases):
161 # find highest base class that is an AbstractContainer (parent is higher than children)
162 base_cls = None
163 for base_cls in reversed(bases): 163 ↛ 167line 163 didn't jump to line 167, because the loop on line 163 didn't complete
164 if issubclass(base_cls, AbstractContainer):
165 break
167 base_fields = base_cls._get_fields() # tuple of field names from base class
168 if base_fields is not fields:
169 # check whether new fields spec already exists in base class
170 fields_to_remove_from_base = list()
171 for field_name in fields_dict:
172 if field_name in base_fields:
173 fields_to_remove_from_base.append(field_name)
174 # prepend field specs from base class to fields list of this class
175 # but only field specs that are not redefined in this class
176 base_fields_conf = base_cls.get_fields_conf() # tuple of fields configurations from base class
177 base_fields_conf_to_add = list()
178 for pconf in base_fields_conf:
179 if pconf['name'] not in fields_to_remove_from_base:
180 base_fields_conf_to_add.append(pconf)
181 all_fields_conf[0:0] = base_fields_conf_to_add
183 # create getter and setter if attribute does not already exist
184 # if 'doc' not specified in __fields__, use doc from docval of __init__
185 docs = {dv['name']: dv['doc'] for dv in get_docval(cls.__init__)}
186 for field_conf in all_fields_conf:
187 pname = field_conf['name']
188 field_conf.setdefault('doc', docs.get(pname))
189 if not hasattr(cls, pname):
190 setattr(cls, pname, property(cls._getter(field_conf), cls._setter(field_conf)))
192 cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf))
193 cls.__fieldsconf = tuple(all_fields_conf)
195 def __del__(self):
196 # Make sure the reference counter for our read IO is being decremented
197 del self.__read_io
198 self.__read_io = None
200 def __new__(cls, *args, **kwargs):
201 """
202 Static method of the object class called by Python to create the object first and then
203 __init__() is called to initialize the object's attributes.
205 NOTE: this method is called directly from ObjectMapper.__new_container__ during the process of
206 constructing the object from builders that are read from a file.
207 """
208 inst = super().__new__(cls)
209 if cls._experimental:
210 warn(_exp_warn_msg(cls))
211 inst.__container_source = kwargs.pop('container_source', None)
212 inst.__parent = None
213 inst.__children = list()
214 inst.__modified = True
215 inst.__object_id = kwargs.pop('object_id', str(uuid4()))
216 # this variable is being passed in from ObjectMapper.__new_container__ and is
217 # reset to False in that method after the object has been initialized by __init__
218 inst._in_construct_mode = kwargs.pop('in_construct_mode', False)
219 inst.parent = kwargs.pop('parent', None)
220 return inst
222 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'})
223 def __init__(self, **kwargs):
224 name = getargs('name', kwargs)
225 if '/' in name:
226 raise ValueError("name '" + name + "' cannot contain '/'")
227 self.__name = name
228 self.__field_values = dict()
229 self.__read_io = None
231 @property
232 def read_io(self):
233 """
234 The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container.
236 This property will typically be None if this Container is not a root Container
237 (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the
238 :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container.
239 """
240 return self.__read_io
242 @read_io.setter
243 def read_io(self, value):
244 """
245 Set the io object used to read this container
247 :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use
248 :raises ValueError: If io has already been set. We can't change the IO for a container.
249 :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO`
250 """
251 # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need
252 # it for type checking we import it here.
253 from hdmf.backends.io import HDMFIO
254 if not isinstance(value, HDMFIO):
255 raise TypeError("io must be an instance of HDMFIO")
256 if self.__read_io is not None:
257 raise ValueError("io has already been set for this container (name=%s, type=%s)" %
258 (self.name, str(type(self))))
259 else:
260 self.__read_io = value
262 def get_read_io(self):
263 """
264 Get the io object used to read this container.
266 If `self.read_io` is None, this function will iterate through the parents and return the
267 first `io` object found on a parent container
269 :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container.
270 Returns None in case no io object is found, e.g., in case this container has
271 not been read from file.
272 """
273 curr_obj = self
274 re_io = self.read_io
275 while re_io is None and curr_obj.parent is not None:
276 curr_obj = curr_obj.parent
277 re_io = curr_obj.read_io
278 return re_io
280 @property
281 def name(self):
282 '''
283 The name of this Container
284 '''
285 return self.__name
287 @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to search for', 'default': None})
288 def get_ancestor(self, **kwargs):
289 """
290 Traverse parent hierarchy and return first instance of the specified data_type
291 """
292 data_type = getargs('data_type', kwargs)
293 if data_type is None:
294 return self.parent
295 p = self.parent
296 while p is not None:
297 if getattr(p, p._data_type_attr) == data_type:
298 return p
299 p = p.parent
300 return None
302 @property
303 def fields(self):
304 '''
305 Subclasses use this class attribute to add properties to autogenerate.
306 `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}.
307 1. name: The name of the field property
308 2. child: A boolean value to set the parent/child relationship between the field property and the container.
309 3. required_name: The name the field property must have such that `name` matches `required_name`.
310 4. doc: Documentation of the field property
311 5. settable: If true, a setter function is created so that the field can be changed after creation.
312 '''
313 return self.__field_values
315 @property
316 def object_id(self):
317 if self.__object_id is None:
318 self.__object_id = str(uuid4())
319 return self.__object_id
321 @docval({'name': 'recurse', 'type': bool,
322 'doc': "whether or not to change the object ID of this container's children", 'default': True})
323 def generate_new_id(self, **kwargs):
324 """Changes the object ID of this Container and all of its children to a new UUID string."""
325 recurse = getargs('recurse', kwargs)
326 self.__object_id = str(uuid4())
327 self.set_modified()
328 if recurse:
329 for c in self.children:
330 c.generate_new_id(**kwargs)
332 @property
333 def modified(self):
334 return self.__modified
336 @docval({'name': 'modified', 'type': bool,
337 'doc': 'whether or not this Container has been modified', 'default': True})
338 def set_modified(self, **kwargs):
339 modified = getargs('modified', kwargs)
340 self.__modified = modified
341 if modified and isinstance(self.parent, Container):
342 self.parent.set_modified()
344 @property
345 def children(self):
346 return tuple(self.__children)
348 @docval({'name': 'child', 'type': 'Container',
349 'doc': 'the child Container for this Container', 'default': None})
350 def add_child(self, **kwargs):
351 warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.'))
352 child = getargs('child', kwargs)
353 if child is not None: 353 ↛ 361line 353 didn't jump to line 361, because the condition on line 353 was never false
354 # if child.parent is a Container, then the mismatch between child.parent and parent
355 # is used to make a soft/external link from the parent to a child elsewhere
356 # if child.parent is not a Container, it is either None or a Proxy and should be set to self
357 if not isinstance(child.parent, AbstractContainer): 357 ↛ exitline 357 didn't return from function 'add_child', because the condition on line 357 was never false
358 # actually add the child to the parent in parent setter
359 child.parent = self
360 else:
361 warn('Cannot add None as child to a container %s' % self.name)
363 @classmethod
364 def type_hierarchy(cls):
365 return cls.__mro__
367 @property
368 def container_source(self):
369 '''
370 The source of this Container
371 '''
372 return self.__container_source
374 @container_source.setter
375 def container_source(self, source):
376 if self.__container_source is not None:
377 raise Exception('cannot reassign container_source')
378 self.__container_source = source
380 @property
381 def parent(self):
382 '''
383 The parent Container of this Container
384 '''
385 # do it this way because __parent may not exist yet (not set in constructor)
386 return getattr(self, '_AbstractContainer__parent', None)
388 @parent.setter
389 def parent(self, parent_container):
390 if self.parent is parent_container:
391 return
393 if self.parent is not None:
394 if isinstance(self.parent, AbstractContainer):
395 raise ValueError(('Cannot reassign parent to Container: %s. '
396 'Parent is already: %s.' % (repr(self), repr(self.parent))))
397 else:
398 if parent_container is None:
399 raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self))
400 # NOTE this assumes isinstance(parent_container, Proxy) but we get a circular import
401 # if we try to do that
402 if self.parent.matches(parent_container):
403 self.__parent = parent_container
404 parent_container.__children.append(self)
405 parent_container.set_modified()
406 else:
407 self.__parent.add_candidate(parent_container)
408 else:
409 self.__parent = parent_container
410 if isinstance(parent_container, Container):
411 parent_container.__children.append(self)
412 parent_container.set_modified()
413 for child in self.children:
414 if type(child).__name__ == "DynamicTableRegion":
415 if child.table.parent is None:
416 msg = "The table for this DynamicTableRegion has not been added to the parent."
417 warn(msg)
418 else:
419 continue
421 def _remove_child(self, child):
422 """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers."""
423 if not isinstance(child, AbstractContainer):
424 raise ValueError('Cannot remove non-AbstractContainer object from children.')
425 if child not in self.children:
426 raise ValueError("%s '%s' is not a child of %s '%s'." % (child.__class__.__name__, child.name,
427 self.__class__.__name__, self.name))
428 child.__parent = None
429 self.__children.remove(child)
430 child.set_modified()
431 self.set_modified()
433 def reset_parent(self):
434 """Reset the parent of this Container to None and remove the Container from the children of its parent.
436 Use with caution. This can result in orphaned containers and broken links.
437 """
438 if self.parent is None:
439 return
440 elif isinstance(self.parent, AbstractContainer):
441 self.parent._remove_child(self)
442 else:
443 raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent))
446class Container(AbstractContainer):
447 """A container that can contain other containers and has special functionality for printing."""
449 _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'}
451 @classmethod
452 def _setter(cls, field):
453 """Returns a list of setter functions for the given field to be added to the class during class declaration."""
454 super_setter = AbstractContainer._setter(field)
455 ret = [super_setter]
456 # create setter with check for required name
457 # the AbstractContainer that is passed to the setter must have name = required_name
458 if field.get('required_name', None) is not None:
459 required_name = field['required_name']
460 idx1 = len(ret) - 1
462 def container_setter(self, val):
463 if val is not None:
464 if not isinstance(val, AbstractContainer):
465 msg = ("Field '%s' on %s has a required name and must be a subclass of AbstractContainer."
466 % (field['name'], self.__class__.__name__))
467 raise ValueError(msg)
468 if val.name != required_name:
469 msg = ("Field '%s' on %s must be named '%s'."
470 % (field['name'], self.__class__.__name__, required_name))
471 raise ValueError(msg)
472 ret[idx1](self, val) # call the previous setter
474 ret.append(container_setter)
476 # create setter that accepts a value or tuple, list, or dict or values and sets the value's parent to self
477 if field.get('child', False):
478 idx2 = len(ret) - 1
480 def container_setter(self, val):
481 ret[idx2](self, val) # call the previous setter
482 if val is not None:
483 if isinstance(val, (tuple, list)):
484 pass
485 elif isinstance(val, dict):
486 val = val.values()
487 else:
488 val = [val]
489 for v in val:
490 if not isinstance(v.parent, Container):
491 v.parent = self
492 else:
493 # the ObjectMapper will create a link from self (parent) to v (child with existing parent)
494 # still need to mark self as modified
495 self.set_modified()
497 ret.append(container_setter)
498 return ret[-1] # return the last setter (which should call the previous setters, if applicable)
500 def __repr__(self):
501 cls = self.__class__
502 template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self))
503 if len(self.fields):
504 template += "\nFields:\n"
505 for k in sorted(self.fields): # sorted to enable tests
506 v = self.fields[k]
507 # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
508 if hasattr(v, '__len__'): 508 ↛ 515line 508 didn't jump to line 515, because the condition on line 508 was never false
509 if isinstance(v, (np.ndarray, list, tuple)):
510 if len(v) > 0:
511 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
512 elif v:
513 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
514 else:
515 template += " {}: {}\n".format(k, v)
516 return template
518 def _repr_html_(self):
519 CSS_STYLE = """
520 <style>
521 .container-fields {
522 font-family: "Open Sans", Arial, sans-serif;
523 }
524 .container-fields .field-value {
525 color: #00788E;
526 }
527 .container-fields details > summary {
528 cursor: pointer;
529 display: list-item;
530 }
531 .container-fields details > summary:hover {
532 color: #0A6EAA;
533 }
534 </style>
535 """
537 JS_SCRIPT = """
538 <script>
539 function copyToClipboard(text) {
540 navigator.clipboard.writeText(text).then(function() {
541 console.log('Copied to clipboard: ' + text);
542 }, function(err) {
543 console.error('Could not copy text: ', err);
544 });
545 }
547 document.addEventListener('DOMContentLoaded', function() {
548 let fieldKeys = document.querySelectorAll('.container-fields .field-key');
549 fieldKeys.forEach(function(fieldKey) {
550 fieldKey.addEventListener('click', function() {
551 let accessCode = fieldKey.getAttribute('title').replace('Access code: ', '');
552 copyToClipboard(accessCode);
553 });
554 });
555 });
556 </script>
557 """
558 if self.name == self.__class__.__name__:
559 header_text = self.name
560 else:
561 header_text = f"{self.name} ({self.__class__.__name__})"
562 html_repr = CSS_STYLE
563 html_repr += JS_SCRIPT
564 html_repr += "<div class='container-wrap'>"
565 html_repr += (
566 f"<div class='container-header'><div class='xr-obj-type'><h3>{header_text}</h3></div></div>"
567 )
568 html_repr += self._generate_html_repr(self.fields)
569 html_repr += "</div>"
570 return html_repr
572 def _generate_html_repr(self, fields, level=0, access_code=".fields"):
573 html_repr = ""
575 if isinstance(fields, dict):
576 for key, value in fields.items():
577 current_access_code = f"{access_code}['{key}']"
578 if (
579 isinstance(value, (list, dict, np.ndarray))
580 or hasattr(value, "fields")
581 ):
582 label = key
583 if isinstance(value, dict):
584 label += f" ({len(value)})"
586 html_repr += (
587 f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
588 f'class="container-fields field-key" title="{current_access_code}"><b>{label}</b></summary>'
589 )
590 if hasattr(value, "fields"):
591 value = value.fields
592 current_access_code = current_access_code + ".fields"
593 html_repr += self._generate_html_repr(
594 value, level + 1, current_access_code
595 )
596 html_repr += "</details>"
597 else:
598 html_repr += (
599 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"'
600 f' title="{current_access_code}">{key}:</span> <span class="field-value">{value}</span></div>'
601 )
602 elif isinstance(fields, list): 602 ↛ 609line 602 didn't jump to line 609, because the condition on line 602 was never false
603 for index, item in enumerate(fields):
604 current_access_code = f"{access_code}[{index}]"
605 html_repr += (
606 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-value"'
607 f' title="{current_access_code}">{str(item)}</span></div>'
608 )
609 elif isinstance(fields, np.ndarray):
610 str_ = str(fields).replace("\n", "</br>")
611 html_repr += (
612 f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
613 )
614 else:
615 pass
617 return html_repr
619 @staticmethod
620 def __smart_str(v, num_indent):
621 """
622 Print compact string representation of data.
624 If v is a list, try to print it using numpy. This will condense the string
625 representation of datasets with many elements. If that doesn't work, just print the list.
627 If v is a dictionary, print the name and type of each element
629 If v is a set, print it sorted
631 If v is a neurodata_type, print the name of type
633 Otherwise, use the built-in str()
634 Parameters
635 ----------
636 v
638 Returns
639 -------
640 str
642 """
644 if isinstance(v, list) or isinstance(v, tuple):
645 if len(v) and isinstance(v[0], AbstractContainer):
646 return Container.__smart_str_list(v, num_indent, '(')
647 try:
648 return str(np.asarray(v))
649 except ValueError:
650 return Container.__smart_str_list(v, num_indent, '(')
651 elif isinstance(v, dict):
652 return Container.__smart_str_dict(v, num_indent)
653 elif isinstance(v, set): 653 ↛ 654line 653 didn't jump to line 654, because the condition on line 653 was never true
654 return Container.__smart_str_list(sorted(list(v)), num_indent, '{')
655 elif isinstance(v, AbstractContainer):
656 return "{} {}".format(getattr(v, 'name'), type(v))
657 else:
658 return str(v)
660 @staticmethod
661 def __smart_str_list(str_list, num_indent, left_br):
662 if left_br == '(': 662 ↛ 664line 662 didn't jump to line 664, because the condition on line 662 was never false
663 right_br = ')'
664 if left_br == '{': 664 ↛ 665line 664 didn't jump to line 665, because the condition on line 664 was never true
665 right_br = '}'
666 if len(str_list) == 0: 666 ↛ 667line 666 didn't jump to line 667, because the condition on line 666 was never true
667 return left_br + ' ' + right_br
668 indent = num_indent * 2 * ' '
669 indent_in = (num_indent + 1) * 2 * ' '
670 out = left_br
671 for v in str_list[:-1]:
672 out += '\n' + indent_in + Container.__smart_str(v, num_indent + 1) + ','
673 if str_list: 673 ↛ 675line 673 didn't jump to line 675, because the condition on line 673 was never false
674 out += '\n' + indent_in + Container.__smart_str(str_list[-1], num_indent + 1)
675 out += '\n' + indent + right_br
676 return out
678 @staticmethod
679 def __smart_str_dict(d, num_indent):
680 left_br = '{'
681 right_br = '}'
682 if len(d) == 0: 682 ↛ 683line 682 didn't jump to line 683, because the condition on line 682 was never true
683 return left_br + ' ' + right_br
684 indent = num_indent * 2 * ' '
685 indent_in = (num_indent + 1) * 2 * ' '
686 out = left_br
687 keys = sorted(list(d.keys()))
688 for k in keys[:-1]:
689 out += '\n' + indent_in + Container.__smart_str(k, num_indent + 1) + ' ' + str(type(d[k])) + ','
690 if keys: 690 ↛ 692line 690 didn't jump to line 692, because the condition on line 690 was never false
691 out += '\n' + indent_in + Container.__smart_str(keys[-1], num_indent + 1) + ' ' + str(type(d[keys[-1]]))
692 out += '\n' + indent + right_br
693 return out
696class Data(AbstractContainer):
697 """
698 A class for representing dataset containers
699 """
701 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'},
702 {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'},
703 {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add',
704 'default': None})
705 def __init__(self, **kwargs):
706 data = popargs('data', kwargs)
707 self.term_set = popargs('term_set', kwargs)
708 super().__init__(**kwargs)
709 if self.term_set is not None:
710 bad_data = [term for term in data if not self.term_set.validate(term=term)]
711 for term in data:
712 if self.term_set.validate(term=term):
713 continue
714 else:
715 bad_data.append(term)
716 if len(bad_data)!=0:
717 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
718 raise ValueError(msg)
719 self.__data = data
720 else:
721 self.__data = data
723 @property
724 def data(self):
725 return self.__data
727 @property
728 def shape(self):
729 """
730 Get the shape of the data represented by this container
731 :return: Shape tuple
732 :rtype: tuple of ints
733 """
734 return get_data_shape(self.__data)
736 @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'})
737 def set_dataio(self, **kwargs):
738 """
739 Apply DataIO object to the data held by this Data object
740 """
741 dataio = getargs('dataio', kwargs)
742 dataio.data = self.__data
743 self.__data = dataio
745 @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
746 def transform(self, **kwargs):
747 """
748 Transform data from the current underlying state.
750 This function can be used to permanently load data from disk, or convert to a different
751 representation, such as a torch.Tensor
752 """
753 func = getargs('func', kwargs)
754 self.__data = func(self.__data)
755 return self
757 def __bool__(self):
758 if self.data is not None: 758 ↛ 763line 758 didn't jump to line 763, because the condition on line 758 was never false
759 if isinstance(self.data, (np.ndarray, tuple, list)):
760 return len(self.data) != 0
761 if self.data: 761 ↛ 763line 761 didn't jump to line 763, because the condition on line 761 was never false
762 return True
763 return False
765 def __len__(self):
766 return len(self.__data)
768 def __getitem__(self, args):
769 return self.get(args)
771 def get(self, args):
772 if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)):
773 return [self.data[i] for i in args]
774 if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray):
775 # This is needed for h5py 2.9 compatibility
776 args = args.tolist()
777 return self.data[args]
779 def append(self, arg):
780 if self.term_set is None:
781 self.__data = append_data(self.__data, arg)
782 else:
783 if self.term_set.validate(term=arg):
784 self.__data = append_data(self.__data, arg)
785 else:
786 msg = ('"%s" is not in the term set.' % arg)
787 raise ValueError(msg)
789 def extend(self, arg):
790 """
791 The extend_data method adds all the elements of the iterable arg to the
792 end of the data of this Data container.
794 :param arg: The iterable to add to the end of this VectorData
795 """
796 if self.term_set is None:
797 self.__data = extend_data(self.__data, arg)
798 else:
799 bad_data = []
800 for item in arg:
801 try:
802 self.append(item)
803 except ValueError:
804 bad_data.append(item)
805 if len(bad_data)!=0:
806 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
807 raise ValueError(msg)
810class DataRegion(Data):
812 @property
813 @abstractmethod
814 def data(self):
815 '''
816 The target data that this region applies to
817 '''
818 pass
820 @property
821 @abstractmethod
822 def region(self):
823 '''
824 The region that indexes into data e.g. slice or list of indices
825 '''
826 pass
829class MultiContainerInterface(Container):
830 """Class that dynamically defines methods to support a Container holding multiple Containers of the same type.
832 To use, extend this class and create a dictionary as a class attribute with any of the following keys:
833 * 'attr' to name the attribute that stores the Container instances
834 * 'type' to provide the Container object type (type or list/tuple of types, type can be a docval macro)
835 * 'add' to name the method for adding Container instances
836 * 'get' to name the method for getting Container instances
837 * 'create' to name the method for creating Container instances (only if a single type is specified)
839 If the attribute does not exist in the class, it will be generated. If it does exist, it should behave like a dict.
841 The keys 'attr', 'type', and 'add' are required.
842 """
844 def __new__(cls, *args, **kwargs):
845 if cls is MultiContainerInterface:
846 raise TypeError("Can't instantiate class MultiContainerInterface.")
847 if not hasattr(cls, '__clsconf__'):
848 raise TypeError("MultiContainerInterface subclass %s is missing __clsconf__ attribute. Please check that "
849 "the class is properly defined." % cls.__name__)
850 return super().__new__(cls, *args, **kwargs)
852 @staticmethod
853 def __add_article(noun):
854 if isinstance(noun, tuple):
855 noun = noun[0]
856 if isinstance(noun, type): 856 ↛ 858line 856 didn't jump to line 858, because the condition on line 856 was never false
857 noun = noun.__name__
858 if noun[0] in ('aeiouAEIOU'):
859 return 'an %s' % noun
860 return 'a %s' % noun
862 @staticmethod
863 def __join(argtype):
864 """Return a grammatical string representation of a list or tuple of classes or text.
866 Examples:
867 cls.__join(Container) returns "Container"
868 cls.__join((Container, )) returns "Container"
869 cls.__join((Container, Data)) returns "Container or Data"
870 cls.__join((Container, Data, Subcontainer)) returns "Container, Data, or Subcontainer"
871 """
873 def tostr(x):
874 return x.__name__ if isinstance(x, type) else x
876 if isinstance(argtype, (list, tuple)):
877 args_str = [tostr(x) for x in argtype]
878 if len(args_str) == 1:
879 return args_str[0]
880 if len(args_str) == 2:
881 return " or ".join(tostr(x) for x in args_str)
882 else:
883 return ", ".join(tostr(x) for x in args_str[:-1]) + ', or ' + args_str[-1]
884 else:
885 return tostr(argtype)
887 @classmethod
888 def __make_get(cls, func_name, attr_name, container_type):
889 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
891 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
892 'default': None},
893 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
894 func_name=func_name, doc=doc)
895 def _func(self, **kwargs):
896 name = getargs('name', kwargs)
897 d = getattr(self, attr_name)
898 ret = None
899 if name is None:
900 if len(d) > 1:
901 msg = ("More than one element in %s of %s '%s' -- must specify a name."
902 % (attr_name, cls.__name__, self.name))
903 raise ValueError(msg)
904 elif len(d) == 0:
905 msg = "%s of %s '%s' is empty." % (attr_name, cls.__name__, self.name)
906 raise ValueError(msg)
907 else: # only one item in dict
908 for v in d.values():
909 ret = v
910 else:
911 ret = d.get(name)
912 if ret is None:
913 msg = "'%s' not found in %s of %s '%s'." % (name, attr_name, cls.__name__, self.name)
914 raise KeyError(msg)
915 return ret
917 return _func
919 @classmethod
920 def __make_getitem(cls, attr_name, container_type):
921 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
923 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
924 'default': None},
925 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
926 func_name='__getitem__', doc=doc)
927 def _func(self, **kwargs):
928 # NOTE this is the same code as the getter but with different error messages
929 name = getargs('name', kwargs)
930 d = getattr(self, attr_name)
931 ret = None
932 if name is None:
933 if len(d) > 1:
934 msg = ("More than one %s in %s '%s' -- must specify a name."
935 % (cls.__join(container_type), cls.__name__, self.name))
936 raise ValueError(msg)
937 elif len(d) == 0:
938 msg = "%s '%s' is empty." % (cls.__name__, self.name)
939 raise ValueError(msg)
940 else: # only one item in dict
941 for v in d.values():
942 ret = v
943 else:
944 ret = d.get(name)
945 if ret is None:
946 msg = "'%s' not found in %s '%s'." % (name, cls.__name__, self.name)
947 raise KeyError(msg)
948 return ret
950 return _func
952 @classmethod
953 def __make_add(cls, func_name, attr_name, container_type):
954 doc = "Add one or multiple %s objects to this %s" % (cls.__join(container_type), cls.__name__)
956 @docval({'name': attr_name, 'type': (list, tuple, dict, container_type),
957 'doc': 'one or multiple %s objects to add to this %s' % (cls.__join(container_type), cls.__name__)},
958 func_name=func_name, doc=doc)
959 def _func(self, **kwargs):
960 container = getargs(attr_name, kwargs)
961 if isinstance(container, container_type):
962 containers = [container]
963 elif isinstance(container, dict):
964 containers = container.values()
965 else:
966 containers = container
967 d = getattr(self, attr_name)
968 for tmp in containers:
969 if not isinstance(tmp.parent, Container):
970 tmp.parent = self
971 else:
972 # the ObjectMapper will create a link from self (parent) to tmp (child with existing parent)
973 # still need to mark self as modified
974 self.set_modified()
975 if tmp.name in d:
976 msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
977 raise ValueError(msg)
978 d[tmp.name] = tmp
979 return container
981 return _func
983 @classmethod
984 def __make_create(cls, func_name, add_name, container_type):
985 doc = "Create %s object and add it to this %s" % (cls.__add_article(container_type), cls.__name__)
987 @docval(*get_docval(container_type.__init__), func_name=func_name, doc=doc,
988 returns="the %s object that was created" % cls.__join(container_type), rtype=container_type)
989 def _func(self, **kwargs):
990 ret = container_type(**kwargs)
991 getattr(self, add_name)(ret)
992 return ret
994 return _func
996 @classmethod
997 def __make_constructor(cls, clsconf):
998 args = list()
999 for conf in clsconf:
1000 attr_name = conf['attr']
1001 container_type = conf['type']
1002 args.append({'name': attr_name, 'type': (list, tuple, dict, container_type),
1003 'doc': '%s to store in this interface' % cls.__join(container_type), 'default': dict()})
1005 args.append({'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': cls.__name__})
1007 @docval(*args, func_name='__init__')
1008 def _func(self, **kwargs):
1009 super().__init__(name=kwargs['name'])
1010 for conf in clsconf:
1011 attr_name = conf['attr']
1012 add_name = conf['add']
1013 container = popargs(attr_name, kwargs)
1014 add = getattr(self, add_name)
1015 add(container)
1017 return _func
1019 @classmethod
1020 def __make_getter(cls, attr):
1021 """Make a getter function for creating a :py:func:`property`"""
1023 def _func(self):
1024 # initialize the field to an empty labeled dict if it has not yet been
1025 # do this here to avoid creating default __init__ which may or may not be overridden in
1026 # custom classes and dynamically generated classes
1027 if attr not in self.fields:
1028 def _remove_child(child):
1029 if child.parent is self:
1030 self._remove_child(child)
1031 self.fields[attr] = LabelledDict(attr, remove_callable=_remove_child)
1033 return self.fields.get(attr)
1035 return _func
1037 @classmethod
1038 def __make_setter(cls, add_name):
1039 """Make a setter function for creating a :py:func:`property`"""
1041 @docval({'name': 'val', 'type': (list, tuple, dict), 'doc': 'the sub items to add', 'default': None})
1042 def _func(self, **kwargs):
1043 val = getargs('val', kwargs)
1044 if val is None:
1045 return
1046 getattr(self, add_name)(val)
1048 return _func
1050 @ExtenderMeta.pre_init
1051 def __build_class(cls, name, bases, classdict):
1052 """Verify __clsconf__ and create methods based on __clsconf__.
1053 This method is called prior to __new__ and __init__ during class declaration in the metaclass.
1054 """
1055 if not hasattr(cls, '__clsconf__'):
1056 return
1058 multi = False
1059 if isinstance(cls.__clsconf__, dict):
1060 clsconf = [cls.__clsconf__]
1061 elif isinstance(cls.__clsconf__, list):
1062 multi = True
1063 clsconf = cls.__clsconf__
1064 else:
1065 raise TypeError("'__clsconf__' for MultiContainerInterface subclass %s must be a dict or a list of "
1066 "dicts." % cls.__name__)
1068 for conf_index, conf_dict in enumerate(clsconf):
1069 cls.__build_conf_methods(conf_dict, conf_index, multi)
1071 # make __getitem__ (square bracket access) only if one conf type is defined
1072 if len(clsconf) == 1:
1073 attr = clsconf[0].get('attr')
1074 container_type = clsconf[0].get('type')
1075 setattr(cls, '__getitem__', cls.__make_getitem(attr, container_type))
1077 # create the constructor, only if it has not been overridden
1078 # i.e. it is the same method as the parent class constructor
1079 if '__init__' not in cls.__dict__:
1080 setattr(cls, '__init__', cls.__make_constructor(clsconf))
1082 @classmethod
1083 def __build_conf_methods(cls, conf_dict, conf_index, multi):
1084 # get add method name
1085 add = conf_dict.get('add')
1086 if add is None:
1087 msg = "MultiContainerInterface subclass %s is missing 'add' key in __clsconf__" % cls.__name__
1088 if multi:
1089 msg += " at index %d" % conf_index
1090 raise ValueError(msg)
1092 # get container attribute name
1093 attr = conf_dict.get('attr')
1094 if attr is None:
1095 msg = "MultiContainerInterface subclass %s is missing 'attr' key in __clsconf__" % cls.__name__
1096 if multi:
1097 msg += " at index %d" % conf_index
1098 raise ValueError(msg)
1100 # get container type
1101 container_type = conf_dict.get('type')
1102 if container_type is None:
1103 msg = "MultiContainerInterface subclass %s is missing 'type' key in __clsconf__" % cls.__name__
1104 if multi:
1105 msg += " at index %d" % conf_index
1106 raise ValueError(msg)
1108 # create property with the name given in 'attr' only if the attribute is not already defined
1109 if not hasattr(cls, attr):
1110 getter = cls.__make_getter(attr)
1111 setter = cls.__make_setter(add)
1112 doc = "a dictionary containing the %s in this %s" % (cls.__join(container_type), cls.__name__)
1113 setattr(cls, attr, property(getter, setter, None, doc))
1115 # create the add method
1116 setattr(cls, add, cls.__make_add(add, attr, container_type))
1118 # create the create method, only if a single container type is specified
1119 create = conf_dict.get('create')
1120 if create is not None:
1121 if isinstance(container_type, type):
1122 setattr(cls, create, cls.__make_create(create, add, container_type))
1123 else:
1124 msg = ("Cannot specify 'create' key in __clsconf__ for MultiContainerInterface subclass %s "
1125 "when 'type' key is not a single type") % cls.__name__
1126 if multi:
1127 msg += " at index %d" % conf_index
1128 raise ValueError(msg)
1130 # create the get method
1131 get = conf_dict.get('get')
1132 if get is not None:
1133 setattr(cls, get, cls.__make_get(get, attr, container_type))
1136class Row(object, metaclass=ExtenderMeta):
1137 """
1138 A class for representing rows from a Table.
1140 The Table class can be indicated with the __table__. Doing so
1141 will set constructor arguments for the Row class and ensure that
1142 Row.idx is set appropriately when a Row is added to the Table. It will
1143 also add functionality to the Table class for getting Row objects.
1145 Note, the Row class is not needed for working with Table objects. This
1146 is merely convenience functionality for working with Tables.
1147 """
1149 __table__ = None
1151 @property
1152 def idx(self):
1153 """The index of this row in its respective Table"""
1154 return self.__idx
1156 @idx.setter
1157 def idx(self, val):
1158 if self.__idx is None: 1158 ↛ 1161line 1158 didn't jump to line 1161, because the condition on line 1158 was never false
1159 self.__idx = val
1160 else:
1161 raise ValueError("cannot reset the ID of a row object")
1163 @property
1164 def table(self):
1165 """The Table this Row comes from"""
1166 return self.__table
1168 @table.setter
1169 def table(self, val):
1170 if val is not None: 1170 ↛ 1172line 1170 didn't jump to line 1172, because the condition on line 1170 was never false
1171 self.__table = val
1172 if self.idx is None:
1173 self.idx = self.__table.add_row(**self.todict())
1175 @ExtenderMeta.pre_init
1176 def __build_row_class(cls, name, bases, classdict):
1177 table_cls = getattr(cls, '__table__', None)
1178 if table_cls is not None:
1179 columns = getattr(table_cls, '__columns__')
1180 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1180 ↛ 1211line 1180 didn't jump to line 1211, because the condition on line 1180 was never false
1181 columns = deepcopy(columns)
1182 func_args = list()
1183 for col in columns:
1184 func_args.append(col)
1185 func_args.append({'name': 'table', 'type': Table, 'default': None,
1186 'help': 'the table this row is from'})
1187 func_args.append({'name': 'idx', 'type': int, 'default': None,
1188 'help': 'the index for this row'})
1190 @docval(*func_args)
1191 def __init__(self, **kwargs):
1192 super(cls, self).__init__()
1193 table, idx = popargs('table', 'idx', kwargs)
1194 self.__keys = list()
1195 self.__idx = None
1196 self.__table = None
1197 for k, v in kwargs.items():
1198 self.__keys.append(k)
1199 setattr(self, k, v)
1200 self.idx = idx
1201 self.table = table
1203 setattr(cls, '__init__', __init__)
1205 def todict(self):
1206 return {k: getattr(self, k) for k in self.__keys}
1208 setattr(cls, 'todict', todict)
1210 # set this so Table.row gets set when a Table is instantiated
1211 table_cls.__rowclass__ = cls
1212 else:
1213 if bases != (object,):
1214 raise ValueError('__table__ must be set if sub-classing Row')
1216 def __eq__(self, other):
1217 return self.idx == other.idx and self.table is other.table
1219 def __str__(self):
1220 return "Row(%i, %s) = %s" % (self.idx, self.table.name, str(self.todict()))
1223class RowGetter:
1224 """
1225 A simple class for providing __getitem__ functionality that returns
1226 Row objects to a Table.
1227 """
1229 def __init__(self, table):
1230 self.table = table
1231 self.cache = dict()
1233 def __getitem__(self, idx):
1234 ret = self.cache.get(idx)
1235 if ret is None:
1236 row = self.table[idx]
1237 ret = self.table.__rowclass__(*row, table=self.table, idx=idx)
1238 self.cache[idx] = ret
1239 return ret
1242class Table(Data):
1243 r'''
1244 Subclasses should specify the class attribute \_\_columns\_\_.
1246 This should be a list of dictionaries with the following keys:
1248 - ``name`` the column name
1249 - ``type`` the type of data in this column
1250 - ``doc`` a brief description of what gets stored in this column
1252 For reference, this list of dictionaries will be used with docval to autogenerate
1253 the ``add_row`` method for adding data to this table.
1255 If \_\_columns\_\_ is not specified, no custom ``add_row`` method will be added.
1257 The class attribute __defaultname__ can also be set to specify a default name
1258 for the table class. If \_\_defaultname\_\_ is not specified, then ``name`` will
1259 need to be specified when the class is instantiated.
1261 A Table class can be paired with a Row class for conveniently working with rows of
1262 a Table. This pairing must be indicated in the Row class implementation. See Row
1263 for more details.
1264 '''
1266 # This class attribute is used to indicate which Row class should be used when
1267 # adding RowGetter functionality to the Table.
1268 __rowclass__ = None
1270 @ExtenderMeta.pre_init
1271 def __build_table_class(cls, name, bases, classdict):
1272 if hasattr(cls, '__columns__'):
1273 columns = getattr(cls, '__columns__')
1275 idx = dict()
1276 for i, col in enumerate(columns):
1277 idx[col['name']] = i
1278 setattr(cls, '__colidx__', idx)
1280 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1280 ↛ 1296line 1280 didn't jump to line 1296, because the condition on line 1280 was never false
1281 name = {'name': 'name', 'type': str, 'doc': 'the name of this table'}
1282 defname = getattr(cls, '__defaultname__', None)
1283 if defname is not None: 1283 ↛ 1286line 1283 didn't jump to line 1286, because the condition on line 1283 was never false
1284 name['default'] = defname # override the name with the default name if present
1286 @docval(name,
1287 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the data in this table',
1288 'default': list()})
1289 def __init__(self, **kwargs):
1290 name, data = getargs('name', 'data', kwargs)
1291 colnames = [i['name'] for i in columns]
1292 super(cls, self).__init__(colnames, name, data)
1294 setattr(cls, '__init__', __init__)
1296 if cls.add_row == bases[-1].add_row: # check if add_row is overridden 1296 ↛ exitline 1296 didn't return from function '__build_table_class', because the condition on line 1296 was never false
1298 @docval(*columns)
1299 def add_row(self, **kwargs):
1300 return super(cls, self).add_row(kwargs)
1302 setattr(cls, 'add_row', add_row)
1304 @docval({'name': 'columns', 'type': (list, tuple), 'doc': 'a list of the columns in this table'},
1305 {'name': 'name', 'type': str, 'doc': 'the name of this container'},
1306 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the source of the data', 'default': list()})
1307 def __init__(self, **kwargs):
1308 self.__columns = tuple(popargs('columns', kwargs))
1309 self.__col_index = {name: idx for idx, name in enumerate(self.__columns)}
1310 if getattr(self, '__rowclass__') is not None:
1311 self.row = RowGetter(self)
1312 super().__init__(**kwargs)
1314 @property
1315 def columns(self):
1316 return self.__columns
1318 @docval({'name': 'values', 'type': dict, 'doc': 'the values for each column'})
1319 def add_row(self, **kwargs):
1320 values = getargs('values', kwargs)
1321 if not isinstance(self.data, list): 1321 ↛ 1322line 1321 didn't jump to line 1322, because the condition on line 1321 was never true
1322 msg = 'Cannot append row to %s' % type(self.data)
1323 raise ValueError(msg)
1324 ret = len(self.data)
1325 row = [values[col] for col in self.columns]
1326 row = [v.idx if isinstance(v, Row) else v for v in row]
1327 self.data.append(tuple(row))
1328 return ret
1330 def which(self, **kwargs):
1331 '''
1332 Query a table
1333 '''
1334 if len(kwargs) != 1: 1334 ↛ 1335line 1334 didn't jump to line 1335, because the condition on line 1334 was never true
1335 raise ValueError("only one column can be queried")
1336 colname, value = kwargs.popitem()
1337 idx = self.__colidx__.get(colname)
1338 if idx is None: 1338 ↛ 1339line 1338 didn't jump to line 1339, because the condition on line 1338 was never true
1339 msg = "no '%s' column in %s" % (colname, self.__class__.__name__)
1340 raise KeyError(msg)
1341 ret = list()
1342 for i in range(len(self.data)):
1343 row = self.data[i]
1344 row_val = row[idx]
1345 if row_val == value:
1346 ret.append(i)
1347 return ret
1349 def __len__(self):
1350 return len(self.data)
1352 def __getitem__(self, args):
1353 idx = args
1354 col = None
1355 if isinstance(args, tuple):
1356 idx = args[1]
1357 if isinstance(args[0], str): 1357 ↛ 1359line 1357 didn't jump to line 1359, because the condition on line 1357 was never false
1358 col = self.__col_index.get(args[0])
1359 elif isinstance(args[0], int):
1360 col = args[0]
1361 else:
1362 raise KeyError('first argument must be a column name or index')
1363 return self.data[idx][col]
1364 elif isinstance(args, str):
1365 col = self.__col_index.get(args)
1366 if col is None: 1366 ↛ 1367line 1366 didn't jump to line 1367, because the condition on line 1366 was never true
1367 raise KeyError(args)
1368 return [row[col] for row in self.data]
1369 else:
1370 return self.data[idx]
1372 def to_dataframe(self):
1373 '''Produce a pandas DataFrame containing this table's data.
1374 '''
1376 data = {colname: self[colname] for ii, colname in enumerate(self.columns)}
1377 return pd.DataFrame(data)
1379 @classmethod
1380 @docval(
1381 {'name': 'df', 'type': pd.DataFrame, 'doc': 'input data'},
1382 {'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': None},
1383 {
1384 'name': 'extra_ok',
1385 'type': bool,
1386 'doc': 'accept (and ignore) unexpected columns on the input dataframe',
1387 'default': False
1388 },
1389 )
1390 def from_dataframe(cls, **kwargs):
1391 '''Construct an instance of Table (or a subclass) from a pandas DataFrame. The columns of the dataframe
1392 should match the columns defined on the Table subclass.
1393 '''
1395 df, name, extra_ok = getargs('df', 'name', 'extra_ok', kwargs)
1397 cls_cols = list([col['name'] for col in getattr(cls, '__columns__')])
1398 df_cols = list(df.columns)
1400 missing_columns = set(cls_cols) - set(df_cols)
1401 extra_columns = set(df_cols) - set(cls_cols)
1403 if extra_columns:
1404 raise ValueError(
1405 'unrecognized column(s) {} for table class {} (columns {})'.format(
1406 extra_columns, cls.__name__, cls_cols
1407 )
1408 )
1410 use_index = False
1411 if len(missing_columns) == 1 and list(missing_columns)[0] == df.index.name: 1411 ↛ 1412line 1411 didn't jump to line 1412, because the condition on line 1411 was never true
1412 use_index = True
1414 elif missing_columns: 1414 ↛ 1415line 1414 didn't jump to line 1415, because the condition on line 1414 was never true
1415 raise ValueError(
1416 'missing column(s) {} for table class {} (columns {}, provided {})'.format(
1417 missing_columns, cls.__name__, cls_cols, df_cols
1418 )
1419 )
1421 data = []
1422 for index, row in df.iterrows():
1423 if use_index: 1423 ↛ 1424line 1423 didn't jump to line 1424, because the condition on line 1423 was never true
1424 data.append([
1425 row[colname] if colname != df.index.name else index
1426 for colname in cls_cols
1427 ])
1428 else:
1429 data.append(tuple([row[colname] for colname in cls_cols]))
1431 if name is None:
1432 return cls(data=data)
1433 return cls(name=name, data=data)