Coverage for src/hdmf/container.py: 94%
840 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-25 05:02 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-25 05:02 +0000
1import types
2from abc import abstractmethod
3from collections import OrderedDict
4from copy import deepcopy
5from uuid import uuid4
6from warnings import warn
8import h5py
9import numpy as np
10import pandas as pd
12from .data_utils import DataIO, append_data, extend_data
13from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
14from hdmf.term_set import TermSet
17def _set_exp(cls):
18 """Set a class as being experimental"""
19 cls._experimental = True
22def _exp_warn_msg(cls):
23 """Generate a warning message experimental features"""
24 pfx = cls
25 if isinstance(cls, type): 25 ↛ 27line 25 didn't jump to line 27, because the condition on line 25 was never false
26 pfx = cls.__name__
27 msg = ('%s is experimental -- it may be removed in the future and '
28 'is not guaranteed to maintain backward compatibility') % pfx
29 return msg
32class ExternalResourcesManager:
33 """
34 This class manages whether to set/attach an instance of ExternalResources to the subclass.
35 """
37 @docval({'name': 'external_resources', 'type': 'ExternalResources',
38 'doc': 'The external resources to be used for the container.'},)
39 def link_resources(self, **kwargs):
40 """
41 Method to attach an instance of ExternalResources in order to auto-add terms/references to data.
42 """
43 self._external_resources = kwargs['external_resources']
45 def get_linked_resources(self):
46 return self._external_resources if hasattr(self, "_external_resources") else None
49class AbstractContainer(metaclass=ExtenderMeta):
50 # The name of the class attribute that subclasses use to autogenerate properties
51 # This parameterization is supplied in case users would like to configure
52 # the class attribute name to something domain-specific
54 _experimental = False
56 _fieldsname = '__fields__'
58 _data_type_attr = 'data_type'
60 # Subclasses use this class attribute to add properties to autogenerate
61 # Autogenerated properties will store values in self.__field_values
62 __fields__ = tuple()
64 # This field is automatically set by __gather_fields before initialization.
65 # It holds all the values in __fields__ for this class and its parent classes.
66 __fieldsconf = tuple()
68 _pconf_allowed_keys = {'name', 'doc', 'settable'}
70 # Override the _setter factor function, so directives that apply to
71 # Container do not get used on Data
72 @classmethod
73 def _setter(cls, field):
74 """
75 Make a setter function for creating a :py:func:`property`
76 """
77 name = field['name']
79 if not field.get('settable', True):
80 return None
82 def setter(self, val):
83 if val is None:
84 return
85 if name in self.fields:
86 msg = "can't set attribute '%s' -- already set" % name
87 raise AttributeError(msg)
88 self.fields[name] = val
90 return setter
92 @classmethod
93 def _getter(cls, field):
94 """
95 Make a getter function for creating a :py:func:`property`
96 """
97 doc = field.get('doc')
98 name = field['name']
100 def getter(self):
101 return self.fields.get(name)
103 setattr(getter, '__doc__', doc)
104 return getter
106 @staticmethod
107 def _check_field_spec(field):
108 """
109 A helper function for __gather_fields to make sure we are always working
110 with a dict specification and that the specification contains the correct keys
111 """
112 tmp = field
113 if isinstance(tmp, dict):
114 if 'name' not in tmp:
115 raise ValueError("must specify 'name' if using dict in __fields__")
116 else:
117 tmp = {'name': tmp}
118 return tmp
120 @classmethod
121 def _check_field_spec_keys(cls, field_conf):
122 for k in field_conf:
123 if k not in cls._pconf_allowed_keys:
124 msg = ("Unrecognized key '%s' in %s config '%s' on %s"
125 % (k, cls._fieldsname, field_conf['name'], cls.__name__))
126 raise ValueError(msg)
128 @classmethod
129 def _get_fields(cls):
130 return getattr(cls, cls._fieldsname)
132 @classmethod
133 def _set_fields(cls, value):
134 return setattr(cls, cls._fieldsname, value)
136 @classmethod
137 def get_fields_conf(cls):
138 return cls.__fieldsconf
140 @ExtenderMeta.pre_init
141 def __gather_fields(cls, name, bases, classdict):
142 '''
143 This classmethod will be called during class declaration in the metaclass to automatically
144 create setters and getters for fields that need to be exported
145 '''
146 fields = cls._get_fields()
147 if not isinstance(fields, tuple):
148 msg = "'%s' must be of type tuple" % cls._fieldsname
149 raise TypeError(msg)
151 # check field specs and create map from field name to field conf dictionary
152 fields_dict = OrderedDict()
153 for f in fields:
154 pconf = cls._check_field_spec(f)
155 cls._check_field_spec_keys(pconf)
156 fields_dict[pconf['name']] = pconf
157 all_fields_conf = list(fields_dict.values())
159 # check whether this class overrides __fields__
160 if len(bases):
161 # find highest base class that is an AbstractContainer (parent is higher than children)
162 base_cls = None
163 for base_cls in reversed(bases): 163 ↛ 167line 163 didn't jump to line 167, because the loop on line 163 didn't complete
164 if issubclass(base_cls, AbstractContainer):
165 break
167 base_fields = base_cls._get_fields() # tuple of field names from base class
168 if base_fields is not fields:
169 # check whether new fields spec already exists in base class
170 fields_to_remove_from_base = list()
171 for field_name in fields_dict:
172 if field_name in base_fields:
173 fields_to_remove_from_base.append(field_name)
174 # prepend field specs from base class to fields list of this class
175 # but only field specs that are not redefined in this class
176 base_fields_conf = base_cls.get_fields_conf() # tuple of fields configurations from base class
177 base_fields_conf_to_add = list()
178 for pconf in base_fields_conf:
179 if pconf['name'] not in fields_to_remove_from_base:
180 base_fields_conf_to_add.append(pconf)
181 all_fields_conf[0:0] = base_fields_conf_to_add
183 # create getter and setter if attribute does not already exist
184 # if 'doc' not specified in __fields__, use doc from docval of __init__
185 docs = {dv['name']: dv['doc'] for dv in get_docval(cls.__init__)}
186 for field_conf in all_fields_conf:
187 pname = field_conf['name']
188 field_conf.setdefault('doc', docs.get(pname))
189 if not hasattr(cls, pname):
190 setattr(cls, pname, property(cls._getter(field_conf), cls._setter(field_conf)))
192 cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf))
193 cls.__fieldsconf = tuple(all_fields_conf)
195 def __del__(self):
196 # Make sure the reference counter for our read IO is being decremented
197 try:
198 del self.__read_io
199 self.__read_io = None
200 except AttributeError:
201 pass
203 def __new__(cls, *args, **kwargs):
204 """
205 Static method of the object class called by Python to create the object first and then
206 __init__() is called to initialize the object's attributes.
208 NOTE: this method is called directly from ObjectMapper.__new_container__ during the process of
209 constructing the object from builders that are read from a file.
210 """
211 inst = super().__new__(cls)
212 if cls._experimental:
213 warn(_exp_warn_msg(cls))
214 inst.__container_source = kwargs.pop('container_source', None)
215 inst.__parent = None
216 inst.__children = list()
217 inst.__modified = True
218 inst.__object_id = kwargs.pop('object_id', str(uuid4()))
219 # this variable is being passed in from ObjectMapper.__new_container__ and is
220 # reset to False in that method after the object has been initialized by __init__
221 inst._in_construct_mode = kwargs.pop('in_construct_mode', False)
222 inst.parent = kwargs.pop('parent', None)
223 return inst
225 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'})
226 def __init__(self, **kwargs):
227 name = getargs('name', kwargs)
228 if '/' in name:
229 raise ValueError("name '" + name + "' cannot contain '/'")
230 self.__name = name
231 self.__field_values = dict()
232 self.__read_io = None
234 @property
235 def read_io(self):
236 """
237 The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container.
239 This property will typically be None if this Container is not a root Container
240 (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the
241 :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container.
242 """
243 return self.__read_io
245 @read_io.setter
246 def read_io(self, value):
247 """
248 Set the io object used to read this container
250 :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use
251 :raises ValueError: If io has already been set. We can't change the IO for a container.
252 :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO`
253 """
254 # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need
255 # it for type checking we import it here.
256 from hdmf.backends.io import HDMFIO
257 if not isinstance(value, HDMFIO):
258 raise TypeError("io must be an instance of HDMFIO")
259 if self.__read_io is not None and self.__read_io is not value:
260 raise ValueError("io has already been set for this container (name=%s, type=%s)" %
261 (self.name, str(type(self))))
262 else:
263 self.__read_io = value
265 def get_read_io(self):
266 """
267 Get the io object used to read this container.
269 If `self.read_io` is None, this function will iterate through the parents and return the
270 first `io` object found on a parent container
272 :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container.
273 Returns None in case no io object is found, e.g., in case this container has
274 not been read from file.
275 """
276 curr_obj = self
277 re_io = self.read_io
278 while re_io is None and curr_obj.parent is not None:
279 curr_obj = curr_obj.parent
280 re_io = curr_obj.read_io
281 return re_io
283 @property
284 def name(self):
285 '''
286 The name of this Container
287 '''
288 return self.__name
290 @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to search for', 'default': None})
291 def get_ancestor(self, **kwargs):
292 """
293 Traverse parent hierarchy and return first instance of the specified data_type
294 """
295 data_type = getargs('data_type', kwargs)
296 if data_type is None:
297 return self.parent
298 p = self.parent
299 while p is not None:
300 if getattr(p, p._data_type_attr) == data_type:
301 return p
302 p = p.parent
303 return None
305 @property
306 def fields(self):
307 '''
308 Subclasses use this class attribute to add properties to autogenerate.
309 `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}.
310 1. name: The name of the field property
311 2. child: A boolean value to set the parent/child relationship between the field property and the container.
312 3. required_name: The name the field property must have such that `name` matches `required_name`.
313 4. doc: Documentation of the field property
314 5. settable: If true, a setter function is created so that the field can be changed after creation.
315 '''
316 return self.__field_values
318 @property
319 def object_id(self):
320 if self.__object_id is None:
321 self.__object_id = str(uuid4())
322 return self.__object_id
324 @docval({'name': 'recurse', 'type': bool,
325 'doc': "whether or not to change the object ID of this container's children", 'default': True})
326 def generate_new_id(self, **kwargs):
327 """Changes the object ID of this Container and all of its children to a new UUID string."""
328 recurse = getargs('recurse', kwargs)
329 self.__object_id = str(uuid4())
330 self.set_modified()
331 if recurse:
332 for c in self.children:
333 c.generate_new_id(**kwargs)
335 @property
336 def modified(self):
337 return self.__modified
339 @docval({'name': 'modified', 'type': bool,
340 'doc': 'whether or not this Container has been modified', 'default': True})
341 def set_modified(self, **kwargs):
342 modified = getargs('modified', kwargs)
343 self.__modified = modified
344 if modified and isinstance(self.parent, Container):
345 self.parent.set_modified()
347 @property
348 def children(self):
349 return tuple(self.__children)
351 @docval({'name': 'child', 'type': 'Container',
352 'doc': 'the child Container for this Container', 'default': None})
353 def add_child(self, **kwargs):
354 warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.'))
355 child = getargs('child', kwargs)
356 if child is not None: 356 ↛ 364line 356 didn't jump to line 364, because the condition on line 356 was never false
357 # if child.parent is a Container, then the mismatch between child.parent and parent
358 # is used to make a soft/external link from the parent to a child elsewhere
359 # if child.parent is not a Container, it is either None or a Proxy and should be set to self
360 if not isinstance(child.parent, AbstractContainer): 360 ↛ exitline 360 didn't return from function 'add_child', because the condition on line 360 was never false
361 # actually add the child to the parent in parent setter
362 child.parent = self
363 else:
364 warn('Cannot add None as child to a container %s' % self.name)
366 @classmethod
367 def type_hierarchy(cls):
368 return cls.__mro__
370 @property
371 def container_source(self):
372 '''
373 The source of this Container
374 '''
375 return self.__container_source
377 @container_source.setter
378 def container_source(self, source):
379 if self.__container_source is not None:
380 raise Exception('cannot reassign container_source')
381 self.__container_source = source
383 @property
384 def parent(self):
385 '''
386 The parent Container of this Container
387 '''
388 # do it this way because __parent may not exist yet (not set in constructor)
389 return getattr(self, '_AbstractContainer__parent', None)
391 @parent.setter
392 def parent(self, parent_container):
393 if self.parent is parent_container:
394 return
396 if self.parent is not None:
397 if isinstance(self.parent, AbstractContainer):
398 raise ValueError(('Cannot reassign parent to Container: %s. '
399 'Parent is already: %s.' % (repr(self), repr(self.parent))))
400 else:
401 if parent_container is None:
402 raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self))
403 # NOTE this assumes isinstance(parent_container, Proxy) but we get a circular import
404 # if we try to do that
405 if self.parent.matches(parent_container):
406 self.__parent = parent_container
407 parent_container.__children.append(self)
408 parent_container.set_modified()
409 else:
410 self.__parent.add_candidate(parent_container)
411 else:
412 self.__parent = parent_container
413 if isinstance(parent_container, Container):
414 parent_container.__children.append(self)
415 parent_container.set_modified()
416 for child in self.children:
417 if type(child).__name__ == "DynamicTableRegion":
418 if child.table.parent is None:
419 msg = "The table for this DynamicTableRegion has not been added to the parent."
420 warn(msg)
421 else:
422 continue
424 def _remove_child(self, child):
425 """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers."""
426 if not isinstance(child, AbstractContainer):
427 raise ValueError('Cannot remove non-AbstractContainer object from children.')
428 if child not in self.children:
429 raise ValueError("%s '%s' is not a child of %s '%s'." % (child.__class__.__name__, child.name,
430 self.__class__.__name__, self.name))
431 child.__parent = None
432 self.__children.remove(child)
433 child.set_modified()
434 self.set_modified()
436 def reset_parent(self):
437 """Reset the parent of this Container to None and remove the Container from the children of its parent.
439 Use with caution. This can result in orphaned containers and broken links.
440 """
441 if self.parent is None:
442 return
443 elif isinstance(self.parent, AbstractContainer):
444 self.parent._remove_child(self)
445 else:
446 raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent))
449class Container(AbstractContainer):
450 """A container that can contain other containers and has special functionality for printing."""
452 _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'}
454 @classmethod
455 def _setter(cls, field):
456 """Returns a list of setter functions for the given field to be added to the class during class declaration."""
457 super_setter = AbstractContainer._setter(field)
458 ret = [super_setter]
459 # create setter with check for required name
460 # the AbstractContainer that is passed to the setter must have name = required_name
461 if field.get('required_name', None) is not None:
462 required_name = field['required_name']
463 idx1 = len(ret) - 1
465 def container_setter(self, val):
466 if val is not None:
467 if not isinstance(val, AbstractContainer):
468 msg = ("Field '%s' on %s has a required name and must be a subclass of AbstractContainer."
469 % (field['name'], self.__class__.__name__))
470 raise ValueError(msg)
471 if val.name != required_name:
472 msg = ("Field '%s' on %s must be named '%s'."
473 % (field['name'], self.__class__.__name__, required_name))
474 raise ValueError(msg)
475 ret[idx1](self, val) # call the previous setter
477 ret.append(container_setter)
479 # create setter that accepts a value or tuple, list, or dict or values and sets the value's parent to self
480 if field.get('child', False):
481 idx2 = len(ret) - 1
483 def container_setter(self, val):
484 ret[idx2](self, val) # call the previous setter
485 if val is not None:
486 if isinstance(val, (tuple, list)):
487 pass
488 elif isinstance(val, dict):
489 val = val.values()
490 else:
491 val = [val]
492 for v in val:
493 if not isinstance(v.parent, Container):
494 v.parent = self
495 else:
496 # the ObjectMapper will create a link from self (parent) to v (child with existing parent)
497 # still need to mark self as modified
498 self.set_modified()
500 ret.append(container_setter)
501 return ret[-1] # return the last setter (which should call the previous setters, if applicable)
503 def __repr__(self):
504 cls = self.__class__
505 template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self))
506 if len(self.fields):
507 template += "\nFields:\n"
508 for k in sorted(self.fields): # sorted to enable tests
509 v = self.fields[k]
510 # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
511 if hasattr(v, '__len__'): 511 ↛ 518line 511 didn't jump to line 518, because the condition on line 511 was never false
512 if isinstance(v, (np.ndarray, list, tuple)):
513 if len(v) > 0:
514 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
515 elif v:
516 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
517 else:
518 template += " {}: {}\n".format(k, v)
519 return template
521 def _repr_html_(self):
522 CSS_STYLE = """
523 <style>
524 .container-fields {
525 font-family: "Open Sans", Arial, sans-serif;
526 }
527 .container-fields .field-value {
528 color: #00788E;
529 }
530 .container-fields details > summary {
531 cursor: pointer;
532 display: list-item;
533 }
534 .container-fields details > summary:hover {
535 color: #0A6EAA;
536 }
537 </style>
538 """
540 JS_SCRIPT = """
541 <script>
542 function copyToClipboard(text) {
543 navigator.clipboard.writeText(text).then(function() {
544 console.log('Copied to clipboard: ' + text);
545 }, function(err) {
546 console.error('Could not copy text: ', err);
547 });
548 }
550 document.addEventListener('DOMContentLoaded', function() {
551 let fieldKeys = document.querySelectorAll('.container-fields .field-key');
552 fieldKeys.forEach(function(fieldKey) {
553 fieldKey.addEventListener('click', function() {
554 let accessCode = fieldKey.getAttribute('title').replace('Access code: ', '');
555 copyToClipboard(accessCode);
556 });
557 });
558 });
559 </script>
560 """
561 if self.name == self.__class__.__name__:
562 header_text = self.name
563 else:
564 header_text = f"{self.name} ({self.__class__.__name__})"
565 html_repr = CSS_STYLE
566 html_repr += JS_SCRIPT
567 html_repr += "<div class='container-wrap'>"
568 html_repr += (
569 f"<div class='container-header'><div class='xr-obj-type'><h3>{header_text}</h3></div></div>"
570 )
571 html_repr += self._generate_html_repr(self.fields)
572 html_repr += "</div>"
573 return html_repr
575 def _generate_html_repr(self, fields, level=0, access_code=".fields"):
576 html_repr = ""
578 if isinstance(fields, dict):
579 for key, value in fields.items():
580 current_access_code = f"{access_code}['{key}']"
581 if (
582 isinstance(value, (list, dict, np.ndarray))
583 or hasattr(value, "fields")
584 ):
585 label = key
586 if isinstance(value, dict):
587 label += f" ({len(value)})"
589 html_repr += (
590 f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
591 f'class="container-fields field-key" title="{current_access_code}"><b>{label}</b></summary>'
592 )
593 if hasattr(value, "fields"):
594 value = value.fields
595 current_access_code = current_access_code + ".fields"
596 html_repr += self._generate_html_repr(
597 value, level + 1, current_access_code
598 )
599 html_repr += "</details>"
600 else:
601 html_repr += (
602 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"'
603 f' title="{current_access_code}">{key}:</span> <span class="field-value">{value}</span></div>'
604 )
605 elif isinstance(fields, list): 605 ↛ 612line 605 didn't jump to line 612, because the condition on line 605 was never false
606 for index, item in enumerate(fields):
607 current_access_code = f"{access_code}[{index}]"
608 html_repr += (
609 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-value"'
610 f' title="{current_access_code}">{str(item)}</span></div>'
611 )
612 elif isinstance(fields, np.ndarray):
613 str_ = str(fields).replace("\n", "</br>")
614 html_repr += (
615 f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
616 )
617 else:
618 pass
620 return html_repr
622 @staticmethod
623 def __smart_str(v, num_indent):
624 """
625 Print compact string representation of data.
627 If v is a list, try to print it using numpy. This will condense the string
628 representation of datasets with many elements. If that doesn't work, just print the list.
630 If v is a dictionary, print the name and type of each element
632 If v is a set, print it sorted
634 If v is a neurodata_type, print the name of type
636 Otherwise, use the built-in str()
637 Parameters
638 ----------
639 v
641 Returns
642 -------
643 str
645 """
647 if isinstance(v, list) or isinstance(v, tuple):
648 if len(v) and isinstance(v[0], AbstractContainer):
649 return Container.__smart_str_list(v, num_indent, '(')
650 try:
651 return str(np.asarray(v))
652 except ValueError:
653 return Container.__smart_str_list(v, num_indent, '(')
654 elif isinstance(v, dict):
655 return Container.__smart_str_dict(v, num_indent)
656 elif isinstance(v, set): 656 ↛ 657line 656 didn't jump to line 657, because the condition on line 656 was never true
657 return Container.__smart_str_list(sorted(list(v)), num_indent, '{')
658 elif isinstance(v, AbstractContainer):
659 return "{} {}".format(getattr(v, 'name'), type(v))
660 else:
661 return str(v)
663 @staticmethod
664 def __smart_str_list(str_list, num_indent, left_br):
665 if left_br == '(': 665 ↛ 667line 665 didn't jump to line 667, because the condition on line 665 was never false
666 right_br = ')'
667 if left_br == '{': 667 ↛ 668line 667 didn't jump to line 668, because the condition on line 667 was never true
668 right_br = '}'
669 if len(str_list) == 0: 669 ↛ 670line 669 didn't jump to line 670, because the condition on line 669 was never true
670 return left_br + ' ' + right_br
671 indent = num_indent * 2 * ' '
672 indent_in = (num_indent + 1) * 2 * ' '
673 out = left_br
674 for v in str_list[:-1]:
675 out += '\n' + indent_in + Container.__smart_str(v, num_indent + 1) + ','
676 if str_list: 676 ↛ 678line 676 didn't jump to line 678, because the condition on line 676 was never false
677 out += '\n' + indent_in + Container.__smart_str(str_list[-1], num_indent + 1)
678 out += '\n' + indent + right_br
679 return out
681 @staticmethod
682 def __smart_str_dict(d, num_indent):
683 left_br = '{'
684 right_br = '}'
685 if len(d) == 0: 685 ↛ 686line 685 didn't jump to line 686, because the condition on line 685 was never true
686 return left_br + ' ' + right_br
687 indent = num_indent * 2 * ' '
688 indent_in = (num_indent + 1) * 2 * ' '
689 out = left_br
690 keys = sorted(list(d.keys()))
691 for k in keys[:-1]:
692 out += '\n' + indent_in + Container.__smart_str(k, num_indent + 1) + ' ' + str(type(d[k])) + ','
693 if keys: 693 ↛ 695line 693 didn't jump to line 695, because the condition on line 693 was never false
694 out += '\n' + indent_in + Container.__smart_str(keys[-1], num_indent + 1) + ' ' + str(type(d[keys[-1]]))
695 out += '\n' + indent + right_br
696 return out
699class Data(AbstractContainer):
700 """
701 A class for representing dataset containers
702 """
704 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'},
705 {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'},
706 {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add',
707 'default': None})
708 def __init__(self, **kwargs):
709 data = popargs('data', kwargs)
710 self.term_set = popargs('term_set', kwargs)
711 super().__init__(**kwargs)
712 if self.term_set is not None:
713 bad_data = [term for term in data if not self.term_set.validate(term=term)]
714 for term in data:
715 if self.term_set.validate(term=term):
716 continue
717 else:
718 bad_data.append(term)
719 if len(bad_data)!=0:
720 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
721 raise ValueError(msg)
722 self.__data = data
723 else:
724 self.__data = data
726 @property
727 def data(self):
728 return self.__data
730 @property
731 def shape(self):
732 """
733 Get the shape of the data represented by this container
734 :return: Shape tuple
735 :rtype: tuple of ints
736 """
737 return get_data_shape(self.__data)
739 @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'})
740 def set_dataio(self, **kwargs):
741 """
742 Apply DataIO object to the data held by this Data object
743 """
744 dataio = getargs('dataio', kwargs)
745 dataio.data = self.__data
746 self.__data = dataio
748 @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
749 def transform(self, **kwargs):
750 """
751 Transform data from the current underlying state.
753 This function can be used to permanently load data from disk, or convert to a different
754 representation, such as a torch.Tensor
755 """
756 func = getargs('func', kwargs)
757 self.__data = func(self.__data)
758 return self
760 def __bool__(self):
761 if self.data is not None: 761 ↛ 766line 761 didn't jump to line 766, because the condition on line 761 was never false
762 if isinstance(self.data, (np.ndarray, tuple, list)):
763 return len(self.data) != 0
764 if self.data: 764 ↛ 766line 764 didn't jump to line 766, because the condition on line 764 was never false
765 return True
766 return False
768 def __len__(self):
769 return len(self.__data)
771 def __getitem__(self, args):
772 return self.get(args)
774 def get(self, args):
775 if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)):
776 return [self.data[i] for i in args]
777 if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray):
778 # This is needed for h5py 2.9 compatibility
779 args = args.tolist()
780 return self.data[args]
782 def append(self, arg):
783 if self.term_set is None:
784 self.__data = append_data(self.__data, arg)
785 else:
786 if self.term_set.validate(term=arg):
787 self.__data = append_data(self.__data, arg)
788 else:
789 msg = ('"%s" is not in the term set.' % arg)
790 raise ValueError(msg)
792 def extend(self, arg):
793 """
794 The extend_data method adds all the elements of the iterable arg to the
795 end of the data of this Data container.
797 :param arg: The iterable to add to the end of this VectorData
798 """
799 if self.term_set is None:
800 self.__data = extend_data(self.__data, arg)
801 else:
802 bad_data = []
803 for item in arg:
804 try:
805 self.append(item)
806 except ValueError:
807 bad_data.append(item)
808 if len(bad_data)!=0:
809 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
810 raise ValueError(msg)
813class DataRegion(Data):
815 @property
816 @abstractmethod
817 def data(self):
818 '''
819 The target data that this region applies to
820 '''
821 pass
823 @property
824 @abstractmethod
825 def region(self):
826 '''
827 The region that indexes into data e.g. slice or list of indices
828 '''
829 pass
832class MultiContainerInterface(Container):
833 """Class that dynamically defines methods to support a Container holding multiple Containers of the same type.
835 To use, extend this class and create a dictionary as a class attribute with any of the following keys:
836 * 'attr' to name the attribute that stores the Container instances
837 * 'type' to provide the Container object type (type or list/tuple of types, type can be a docval macro)
838 * 'add' to name the method for adding Container instances
839 * 'get' to name the method for getting Container instances
840 * 'create' to name the method for creating Container instances (only if a single type is specified)
842 If the attribute does not exist in the class, it will be generated. If it does exist, it should behave like a dict.
844 The keys 'attr', 'type', and 'add' are required.
845 """
847 def __new__(cls, *args, **kwargs):
848 if cls is MultiContainerInterface:
849 raise TypeError("Can't instantiate class MultiContainerInterface.")
850 if not hasattr(cls, '__clsconf__'):
851 raise TypeError("MultiContainerInterface subclass %s is missing __clsconf__ attribute. Please check that "
852 "the class is properly defined." % cls.__name__)
853 return super().__new__(cls, *args, **kwargs)
855 @staticmethod
856 def __add_article(noun):
857 if isinstance(noun, tuple):
858 noun = noun[0]
859 if isinstance(noun, type): 859 ↛ 861line 859 didn't jump to line 861, because the condition on line 859 was never false
860 noun = noun.__name__
861 if noun[0] in ('aeiouAEIOU'):
862 return 'an %s' % noun
863 return 'a %s' % noun
865 @staticmethod
866 def __join(argtype):
867 """Return a grammatical string representation of a list or tuple of classes or text.
869 Examples:
870 cls.__join(Container) returns "Container"
871 cls.__join((Container, )) returns "Container"
872 cls.__join((Container, Data)) returns "Container or Data"
873 cls.__join((Container, Data, Subcontainer)) returns "Container, Data, or Subcontainer"
874 """
876 def tostr(x):
877 return x.__name__ if isinstance(x, type) else x
879 if isinstance(argtype, (list, tuple)):
880 args_str = [tostr(x) for x in argtype]
881 if len(args_str) == 1:
882 return args_str[0]
883 if len(args_str) == 2:
884 return " or ".join(tostr(x) for x in args_str)
885 else:
886 return ", ".join(tostr(x) for x in args_str[:-1]) + ', or ' + args_str[-1]
887 else:
888 return tostr(argtype)
890 @classmethod
891 def __make_get(cls, func_name, attr_name, container_type):
892 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
894 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
895 'default': None},
896 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
897 func_name=func_name, doc=doc)
898 def _func(self, **kwargs):
899 name = getargs('name', kwargs)
900 d = getattr(self, attr_name)
901 ret = None
902 if name is None:
903 if len(d) > 1:
904 msg = ("More than one element in %s of %s '%s' -- must specify a name."
905 % (attr_name, cls.__name__, self.name))
906 raise ValueError(msg)
907 elif len(d) == 0:
908 msg = "%s of %s '%s' is empty." % (attr_name, cls.__name__, self.name)
909 raise ValueError(msg)
910 else: # only one item in dict
911 for v in d.values():
912 ret = v
913 else:
914 ret = d.get(name)
915 if ret is None:
916 msg = "'%s' not found in %s of %s '%s'." % (name, attr_name, cls.__name__, self.name)
917 raise KeyError(msg)
918 return ret
920 return _func
922 @classmethod
923 def __make_getitem(cls, attr_name, container_type):
924 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
926 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
927 'default': None},
928 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
929 func_name='__getitem__', doc=doc)
930 def _func(self, **kwargs):
931 # NOTE this is the same code as the getter but with different error messages
932 name = getargs('name', kwargs)
933 d = getattr(self, attr_name)
934 ret = None
935 if name is None:
936 if len(d) > 1:
937 msg = ("More than one %s in %s '%s' -- must specify a name."
938 % (cls.__join(container_type), cls.__name__, self.name))
939 raise ValueError(msg)
940 elif len(d) == 0:
941 msg = "%s '%s' is empty." % (cls.__name__, self.name)
942 raise ValueError(msg)
943 else: # only one item in dict
944 for v in d.values():
945 ret = v
946 else:
947 ret = d.get(name)
948 if ret is None:
949 msg = "'%s' not found in %s '%s'." % (name, cls.__name__, self.name)
950 raise KeyError(msg)
951 return ret
953 return _func
955 @classmethod
956 def __make_add(cls, func_name, attr_name, container_type):
957 doc = "Add one or multiple %s objects to this %s" % (cls.__join(container_type), cls.__name__)
959 @docval({'name': attr_name, 'type': (list, tuple, dict, container_type),
960 'doc': 'one or multiple %s objects to add to this %s' % (cls.__join(container_type), cls.__name__)},
961 func_name=func_name, doc=doc)
962 def _func(self, **kwargs):
963 container = getargs(attr_name, kwargs)
964 if isinstance(container, container_type):
965 containers = [container]
966 elif isinstance(container, dict):
967 containers = container.values()
968 else:
969 containers = container
970 d = getattr(self, attr_name)
971 for tmp in containers:
972 if not isinstance(tmp.parent, Container):
973 tmp.parent = self
974 else:
975 # the ObjectMapper will create a link from self (parent) to tmp (child with existing parent)
976 # still need to mark self as modified
977 self.set_modified()
978 if tmp.name in d:
979 msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
980 raise ValueError(msg)
981 d[tmp.name] = tmp
982 return container
984 return _func
986 @classmethod
987 def __make_create(cls, func_name, add_name, container_type):
988 doc = "Create %s object and add it to this %s" % (cls.__add_article(container_type), cls.__name__)
990 @docval(*get_docval(container_type.__init__), func_name=func_name, doc=doc,
991 returns="the %s object that was created" % cls.__join(container_type), rtype=container_type)
992 def _func(self, **kwargs):
993 ret = container_type(**kwargs)
994 getattr(self, add_name)(ret)
995 return ret
997 return _func
999 @classmethod
1000 def __make_constructor(cls, clsconf):
1001 args = list()
1002 for conf in clsconf:
1003 attr_name = conf['attr']
1004 container_type = conf['type']
1005 args.append({'name': attr_name, 'type': (list, tuple, dict, container_type),
1006 'doc': '%s to store in this interface' % cls.__join(container_type), 'default': dict()})
1008 args.append({'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': cls.__name__})
1010 @docval(*args, func_name='__init__')
1011 def _func(self, **kwargs):
1012 super().__init__(name=kwargs['name'])
1013 for conf in clsconf:
1014 attr_name = conf['attr']
1015 add_name = conf['add']
1016 container = popargs(attr_name, kwargs)
1017 add = getattr(self, add_name)
1018 add(container)
1020 return _func
1022 @classmethod
1023 def __make_getter(cls, attr):
1024 """Make a getter function for creating a :py:func:`property`"""
1026 def _func(self):
1027 # initialize the field to an empty labeled dict if it has not yet been
1028 # do this here to avoid creating default __init__ which may or may not be overridden in
1029 # custom classes and dynamically generated classes
1030 if attr not in self.fields:
1031 def _remove_child(child):
1032 if child.parent is self:
1033 self._remove_child(child)
1034 self.fields[attr] = LabelledDict(attr, remove_callable=_remove_child)
1036 return self.fields.get(attr)
1038 return _func
1040 @classmethod
1041 def __make_setter(cls, add_name):
1042 """Make a setter function for creating a :py:func:`property`"""
1044 @docval({'name': 'val', 'type': (list, tuple, dict), 'doc': 'the sub items to add', 'default': None})
1045 def _func(self, **kwargs):
1046 val = getargs('val', kwargs)
1047 if val is None:
1048 return
1049 getattr(self, add_name)(val)
1051 return _func
1053 @ExtenderMeta.pre_init
1054 def __build_class(cls, name, bases, classdict):
1055 """Verify __clsconf__ and create methods based on __clsconf__.
1056 This method is called prior to __new__ and __init__ during class declaration in the metaclass.
1057 """
1058 if not hasattr(cls, '__clsconf__'):
1059 return
1061 multi = False
1062 if isinstance(cls.__clsconf__, dict):
1063 clsconf = [cls.__clsconf__]
1064 elif isinstance(cls.__clsconf__, list):
1065 multi = True
1066 clsconf = cls.__clsconf__
1067 else:
1068 raise TypeError("'__clsconf__' for MultiContainerInterface subclass %s must be a dict or a list of "
1069 "dicts." % cls.__name__)
1071 for conf_index, conf_dict in enumerate(clsconf):
1072 cls.__build_conf_methods(conf_dict, conf_index, multi)
1074 # make __getitem__ (square bracket access) only if one conf type is defined
1075 if len(clsconf) == 1:
1076 attr = clsconf[0].get('attr')
1077 container_type = clsconf[0].get('type')
1078 setattr(cls, '__getitem__', cls.__make_getitem(attr, container_type))
1080 # create the constructor, only if it has not been overridden
1081 # i.e. it is the same method as the parent class constructor
1082 if '__init__' not in cls.__dict__:
1083 setattr(cls, '__init__', cls.__make_constructor(clsconf))
1085 @classmethod
1086 def __build_conf_methods(cls, conf_dict, conf_index, multi):
1087 # get add method name
1088 add = conf_dict.get('add')
1089 if add is None:
1090 msg = "MultiContainerInterface subclass %s is missing 'add' key in __clsconf__" % cls.__name__
1091 if multi:
1092 msg += " at index %d" % conf_index
1093 raise ValueError(msg)
1095 # get container attribute name
1096 attr = conf_dict.get('attr')
1097 if attr is None:
1098 msg = "MultiContainerInterface subclass %s is missing 'attr' key in __clsconf__" % cls.__name__
1099 if multi:
1100 msg += " at index %d" % conf_index
1101 raise ValueError(msg)
1103 # get container type
1104 container_type = conf_dict.get('type')
1105 if container_type is None:
1106 msg = "MultiContainerInterface subclass %s is missing 'type' key in __clsconf__" % cls.__name__
1107 if multi:
1108 msg += " at index %d" % conf_index
1109 raise ValueError(msg)
1111 # create property with the name given in 'attr' only if the attribute is not already defined
1112 if not hasattr(cls, attr):
1113 getter = cls.__make_getter(attr)
1114 setter = cls.__make_setter(add)
1115 doc = "a dictionary containing the %s in this %s" % (cls.__join(container_type), cls.__name__)
1116 setattr(cls, attr, property(getter, setter, None, doc))
1118 # create the add method
1119 setattr(cls, add, cls.__make_add(add, attr, container_type))
1121 # create the create method, only if a single container type is specified
1122 create = conf_dict.get('create')
1123 if create is not None:
1124 if isinstance(container_type, type):
1125 setattr(cls, create, cls.__make_create(create, add, container_type))
1126 else:
1127 msg = ("Cannot specify 'create' key in __clsconf__ for MultiContainerInterface subclass %s "
1128 "when 'type' key is not a single type") % cls.__name__
1129 if multi:
1130 msg += " at index %d" % conf_index
1131 raise ValueError(msg)
1133 # create the get method
1134 get = conf_dict.get('get')
1135 if get is not None:
1136 setattr(cls, get, cls.__make_get(get, attr, container_type))
1139class Row(object, metaclass=ExtenderMeta):
1140 """
1141 A class for representing rows from a Table.
1143 The Table class can be indicated with the __table__. Doing so
1144 will set constructor arguments for the Row class and ensure that
1145 Row.idx is set appropriately when a Row is added to the Table. It will
1146 also add functionality to the Table class for getting Row objects.
1148 Note, the Row class is not needed for working with Table objects. This
1149 is merely convenience functionality for working with Tables.
1150 """
1152 __table__ = None
1154 @property
1155 def idx(self):
1156 """The index of this row in its respective Table"""
1157 return self.__idx
1159 @idx.setter
1160 def idx(self, val):
1161 if self.__idx is None: 1161 ↛ 1164line 1161 didn't jump to line 1164, because the condition on line 1161 was never false
1162 self.__idx = val
1163 else:
1164 raise ValueError("cannot reset the ID of a row object")
1166 @property
1167 def table(self):
1168 """The Table this Row comes from"""
1169 return self.__table
1171 @table.setter
1172 def table(self, val):
1173 if val is not None: 1173 ↛ 1175line 1173 didn't jump to line 1175, because the condition on line 1173 was never false
1174 self.__table = val
1175 if self.idx is None:
1176 self.idx = self.__table.add_row(**self.todict())
1178 @ExtenderMeta.pre_init
1179 def __build_row_class(cls, name, bases, classdict):
1180 table_cls = getattr(cls, '__table__', None)
1181 if table_cls is not None:
1182 columns = getattr(table_cls, '__columns__')
1183 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1183 ↛ 1214line 1183 didn't jump to line 1214, because the condition on line 1183 was never false
1184 columns = deepcopy(columns)
1185 func_args = list()
1186 for col in columns:
1187 func_args.append(col)
1188 func_args.append({'name': 'table', 'type': Table, 'default': None,
1189 'help': 'the table this row is from'})
1190 func_args.append({'name': 'idx', 'type': int, 'default': None,
1191 'help': 'the index for this row'})
1193 @docval(*func_args)
1194 def __init__(self, **kwargs):
1195 super(cls, self).__init__()
1196 table, idx = popargs('table', 'idx', kwargs)
1197 self.__keys = list()
1198 self.__idx = None
1199 self.__table = None
1200 for k, v in kwargs.items():
1201 self.__keys.append(k)
1202 setattr(self, k, v)
1203 self.idx = idx
1204 self.table = table
1206 setattr(cls, '__init__', __init__)
1208 def todict(self):
1209 return {k: getattr(self, k) for k in self.__keys}
1211 setattr(cls, 'todict', todict)
1213 # set this so Table.row gets set when a Table is instantiated
1214 table_cls.__rowclass__ = cls
1215 else:
1216 if bases != (object,):
1217 raise ValueError('__table__ must be set if sub-classing Row')
1219 def __eq__(self, other):
1220 return self.idx == other.idx and self.table is other.table
1222 def __str__(self):
1223 return "Row(%i, %s) = %s" % (self.idx, self.table.name, str(self.todict()))
1226class RowGetter:
1227 """
1228 A simple class for providing __getitem__ functionality that returns
1229 Row objects to a Table.
1230 """
1232 def __init__(self, table):
1233 self.table = table
1234 self.cache = dict()
1236 def __getitem__(self, idx):
1237 ret = self.cache.get(idx)
1238 if ret is None:
1239 row = self.table[idx]
1240 ret = self.table.__rowclass__(*row, table=self.table, idx=idx)
1241 self.cache[idx] = ret
1242 return ret
1245class Table(Data):
1246 r'''
1247 Subclasses should specify the class attribute \_\_columns\_\_.
1249 This should be a list of dictionaries with the following keys:
1251 - ``name`` the column name
1252 - ``type`` the type of data in this column
1253 - ``doc`` a brief description of what gets stored in this column
1255 For reference, this list of dictionaries will be used with docval to autogenerate
1256 the ``add_row`` method for adding data to this table.
1258 If \_\_columns\_\_ is not specified, no custom ``add_row`` method will be added.
1260 The class attribute __defaultname__ can also be set to specify a default name
1261 for the table class. If \_\_defaultname\_\_ is not specified, then ``name`` will
1262 need to be specified when the class is instantiated.
1264 A Table class can be paired with a Row class for conveniently working with rows of
1265 a Table. This pairing must be indicated in the Row class implementation. See Row
1266 for more details.
1267 '''
1269 # This class attribute is used to indicate which Row class should be used when
1270 # adding RowGetter functionality to the Table.
1271 __rowclass__ = None
1273 @ExtenderMeta.pre_init
1274 def __build_table_class(cls, name, bases, classdict):
1275 if hasattr(cls, '__columns__'):
1276 columns = getattr(cls, '__columns__')
1278 idx = dict()
1279 for i, col in enumerate(columns):
1280 idx[col['name']] = i
1281 setattr(cls, '__colidx__', idx)
1283 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1283 ↛ 1299line 1283 didn't jump to line 1299, because the condition on line 1283 was never false
1284 name = {'name': 'name', 'type': str, 'doc': 'the name of this table'}
1285 defname = getattr(cls, '__defaultname__', None)
1286 if defname is not None: 1286 ↛ 1289line 1286 didn't jump to line 1289, because the condition on line 1286 was never false
1287 name['default'] = defname # override the name with the default name if present
1289 @docval(name,
1290 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the data in this table',
1291 'default': list()})
1292 def __init__(self, **kwargs):
1293 name, data = getargs('name', 'data', kwargs)
1294 colnames = [i['name'] for i in columns]
1295 super(cls, self).__init__(colnames, name, data)
1297 setattr(cls, '__init__', __init__)
1299 if cls.add_row == bases[-1].add_row: # check if add_row is overridden 1299 ↛ exitline 1299 didn't return from function '__build_table_class', because the condition on line 1299 was never false
1301 @docval(*columns)
1302 def add_row(self, **kwargs):
1303 return super(cls, self).add_row(kwargs)
1305 setattr(cls, 'add_row', add_row)
1307 @docval({'name': 'columns', 'type': (list, tuple), 'doc': 'a list of the columns in this table'},
1308 {'name': 'name', 'type': str, 'doc': 'the name of this container'},
1309 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the source of the data', 'default': list()})
1310 def __init__(self, **kwargs):
1311 self.__columns = tuple(popargs('columns', kwargs))
1312 self.__col_index = {name: idx for idx, name in enumerate(self.__columns)}
1313 if getattr(self, '__rowclass__') is not None:
1314 self.row = RowGetter(self)
1315 super().__init__(**kwargs)
1317 @property
1318 def columns(self):
1319 return self.__columns
1321 @docval({'name': 'values', 'type': dict, 'doc': 'the values for each column'})
1322 def add_row(self, **kwargs):
1323 values = getargs('values', kwargs)
1324 if not isinstance(self.data, list): 1324 ↛ 1325line 1324 didn't jump to line 1325, because the condition on line 1324 was never true
1325 msg = 'Cannot append row to %s' % type(self.data)
1326 raise ValueError(msg)
1327 ret = len(self.data)
1328 row = [values[col] for col in self.columns]
1329 row = [v.idx if isinstance(v, Row) else v for v in row]
1330 self.data.append(tuple(row))
1331 return ret
1333 def which(self, **kwargs):
1334 '''
1335 Query a table
1336 '''
1337 if len(kwargs) != 1: 1337 ↛ 1338line 1337 didn't jump to line 1338, because the condition on line 1337 was never true
1338 raise ValueError("only one column can be queried")
1339 colname, value = kwargs.popitem()
1340 idx = self.__colidx__.get(colname)
1341 if idx is None: 1341 ↛ 1342line 1341 didn't jump to line 1342, because the condition on line 1341 was never true
1342 msg = "no '%s' column in %s" % (colname, self.__class__.__name__)
1343 raise KeyError(msg)
1344 ret = list()
1345 for i in range(len(self.data)):
1346 row = self.data[i]
1347 row_val = row[idx]
1348 if row_val == value:
1349 ret.append(i)
1350 return ret
1352 def __len__(self):
1353 return len(self.data)
1355 def __getitem__(self, args):
1356 idx = args
1357 col = None
1358 if isinstance(args, tuple):
1359 idx = args[1]
1360 if isinstance(args[0], str): 1360 ↛ 1362line 1360 didn't jump to line 1362, because the condition on line 1360 was never false
1361 col = self.__col_index.get(args[0])
1362 elif isinstance(args[0], int):
1363 col = args[0]
1364 else:
1365 raise KeyError('first argument must be a column name or index')
1366 return self.data[idx][col]
1367 elif isinstance(args, str):
1368 col = self.__col_index.get(args)
1369 if col is None: 1369 ↛ 1370line 1369 didn't jump to line 1370, because the condition on line 1369 was never true
1370 raise KeyError(args)
1371 return [row[col] for row in self.data]
1372 else:
1373 return self.data[idx]
1375 def to_dataframe(self):
1376 '''Produce a pandas DataFrame containing this table's data.
1377 '''
1379 data = {colname: self[colname] for ii, colname in enumerate(self.columns)}
1380 return pd.DataFrame(data)
1382 @classmethod
1383 @docval(
1384 {'name': 'df', 'type': pd.DataFrame, 'doc': 'input data'},
1385 {'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': None},
1386 {
1387 'name': 'extra_ok',
1388 'type': bool,
1389 'doc': 'accept (and ignore) unexpected columns on the input dataframe',
1390 'default': False
1391 },
1392 )
1393 def from_dataframe(cls, **kwargs):
1394 '''Construct an instance of Table (or a subclass) from a pandas DataFrame. The columns of the dataframe
1395 should match the columns defined on the Table subclass.
1396 '''
1398 df, name, extra_ok = getargs('df', 'name', 'extra_ok', kwargs)
1400 cls_cols = list([col['name'] for col in getattr(cls, '__columns__')])
1401 df_cols = list(df.columns)
1403 missing_columns = set(cls_cols) - set(df_cols)
1404 extra_columns = set(df_cols) - set(cls_cols)
1406 if extra_columns:
1407 raise ValueError(
1408 'unrecognized column(s) {} for table class {} (columns {})'.format(
1409 extra_columns, cls.__name__, cls_cols
1410 )
1411 )
1413 use_index = False
1414 if len(missing_columns) == 1 and list(missing_columns)[0] == df.index.name: 1414 ↛ 1415line 1414 didn't jump to line 1415, because the condition on line 1414 was never true
1415 use_index = True
1417 elif missing_columns: 1417 ↛ 1418line 1417 didn't jump to line 1418, because the condition on line 1417 was never true
1418 raise ValueError(
1419 'missing column(s) {} for table class {} (columns {}, provided {})'.format(
1420 missing_columns, cls.__name__, cls_cols, df_cols
1421 )
1422 )
1424 data = []
1425 for index, row in df.iterrows():
1426 if use_index: 1426 ↛ 1427line 1426 didn't jump to line 1427, because the condition on line 1426 was never true
1427 data.append([
1428 row[colname] if colname != df.index.name else index
1429 for colname in cls_cols
1430 ])
1431 else:
1432 data.append(tuple([row[colname] for colname in cls_cols]))
1434 if name is None:
1435 return cls(data=data)
1436 return cls(name=name, data=data)