Coverage for src/hdmf/container.py: 94%
843 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
1import types
2from abc import abstractmethod
3from collections import OrderedDict
4from copy import deepcopy
5from uuid import uuid4
6from warnings import warn
8import h5py
9import numpy as np
10import pandas as pd
12from .data_utils import DataIO, append_data, extend_data
13from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
16def _set_exp(cls):
17 """Set a class as being experimental"""
18 cls._experimental = True
21def _exp_warn_msg(cls):
22 """Generate a warning message experimental features"""
23 pfx = cls
24 if isinstance(cls, type): 24 ↛ 26line 24 didn't jump to line 26, because the condition on line 24 was never false
25 pfx = cls.__name__
26 msg = ('%s is experimental -- it may be removed in the future and '
27 'is not guaranteed to maintain backward compatibility') % pfx
28 return msg
31class HERDManager:
32 """
33 This class manages whether to set/attach an instance of HERD to the subclass.
34 """
36 @docval({'name': 'herd', 'type': 'HERD',
37 'doc': 'The external resources to be used for the container.'},)
38 def link_resources(self, **kwargs):
39 """
40 Method to attach an instance of HERD in order to auto-add terms/references to data.
41 """
42 self._herd = kwargs['herd']
44 def get_linked_resources(self):
45 return self._herd if hasattr(self, "_herd") else None
48class AbstractContainer(metaclass=ExtenderMeta):
49 # The name of the class attribute that subclasses use to autogenerate properties
50 # This parameterization is supplied in case users would like to configure
51 # the class attribute name to something domain-specific
53 _experimental = False
55 _fieldsname = '__fields__'
57 _data_type_attr = 'data_type'
59 # Subclasses use this class attribute to add properties to autogenerate
60 # Autogenerated properties will store values in self.__field_values
61 __fields__ = tuple()
63 # This field is automatically set by __gather_fields before initialization.
64 # It holds all the values in __fields__ for this class and its parent classes.
65 __fieldsconf = tuple()
67 _pconf_allowed_keys = {'name', 'doc', 'settable'}
69 # Override the _setter factor function, so directives that apply to
70 # Container do not get used on Data
71 @classmethod
72 def _setter(cls, field):
73 """
74 Make a setter function for creating a :py:func:`property`
75 """
76 name = field['name']
78 if not field.get('settable', True):
79 return None
81 def setter(self, val):
82 if val is None:
83 return
84 if name in self.fields:
85 msg = "can't set attribute '%s' -- already set" % name
86 raise AttributeError(msg)
87 self.fields[name] = val
89 return setter
91 @classmethod
92 def _getter(cls, field):
93 """
94 Make a getter function for creating a :py:func:`property`
95 """
96 doc = field.get('doc')
97 name = field['name']
99 def getter(self):
100 return self.fields.get(name)
102 setattr(getter, '__doc__', doc)
103 return getter
105 @staticmethod
106 def _check_field_spec(field):
107 """
108 A helper function for __gather_fields to make sure we are always working
109 with a dict specification and that the specification contains the correct keys
110 """
111 tmp = field
112 if isinstance(tmp, dict):
113 if 'name' not in tmp:
114 raise ValueError("must specify 'name' if using dict in __fields__")
115 else:
116 tmp = {'name': tmp}
117 return tmp
119 @classmethod
120 def _check_field_spec_keys(cls, field_conf):
121 for k in field_conf:
122 if k not in cls._pconf_allowed_keys:
123 msg = ("Unrecognized key '%s' in %s config '%s' on %s"
124 % (k, cls._fieldsname, field_conf['name'], cls.__name__))
125 raise ValueError(msg)
127 @classmethod
128 def _get_fields(cls):
129 return getattr(cls, cls._fieldsname)
131 @classmethod
132 def _set_fields(cls, value):
133 return setattr(cls, cls._fieldsname, value)
135 @classmethod
136 def get_fields_conf(cls):
137 return cls.__fieldsconf
139 @ExtenderMeta.pre_init
140 def __gather_fields(cls, name, bases, classdict):
141 '''
142 This classmethod will be called during class declaration in the metaclass to automatically
143 create setters and getters for fields that need to be exported
144 '''
145 fields = cls._get_fields()
146 if not isinstance(fields, tuple):
147 msg = "'%s' must be of type tuple" % cls._fieldsname
148 raise TypeError(msg)
150 # check field specs and create map from field name to field conf dictionary
151 fields_dict = OrderedDict()
152 for f in fields:
153 pconf = cls._check_field_spec(f)
154 cls._check_field_spec_keys(pconf)
155 fields_dict[pconf['name']] = pconf
156 all_fields_conf = list(fields_dict.values())
158 # check whether this class overrides __fields__
159 if len(bases):
160 # find highest base class that is an AbstractContainer (parent is higher than children)
161 base_cls = None
162 for base_cls in reversed(bases): 162 ↛ 166line 162 didn't jump to line 166, because the loop on line 162 didn't complete
163 if issubclass(base_cls, AbstractContainer):
164 break
166 base_fields = base_cls._get_fields() # tuple of field names from base class
167 if base_fields is not fields:
168 # check whether new fields spec already exists in base class
169 fields_to_remove_from_base = list()
170 for field_name in fields_dict:
171 if field_name in base_fields:
172 fields_to_remove_from_base.append(field_name)
173 # prepend field specs from base class to fields list of this class
174 # but only field specs that are not redefined in this class
175 base_fields_conf = base_cls.get_fields_conf() # tuple of fields configurations from base class
176 base_fields_conf_to_add = list()
177 for pconf in base_fields_conf:
178 if pconf['name'] not in fields_to_remove_from_base:
179 base_fields_conf_to_add.append(pconf)
180 all_fields_conf[0:0] = base_fields_conf_to_add
182 # create getter and setter if attribute does not already exist
183 # if 'doc' not specified in __fields__, use doc from docval of __init__
184 docs = {dv['name']: dv['doc'] for dv in get_docval(cls.__init__)}
185 for field_conf in all_fields_conf:
186 pname = field_conf['name']
187 field_conf.setdefault('doc', docs.get(pname))
188 if not hasattr(cls, pname):
189 setattr(cls, pname, property(cls._getter(field_conf), cls._setter(field_conf)))
191 cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf))
192 cls.__fieldsconf = tuple(all_fields_conf)
194 def __del__(self):
195 # Make sure the reference counter for our read IO is being decremented
196 try:
197 del self.__read_io
198 self.__read_io = None
199 except AttributeError:
200 pass
202 def __new__(cls, *args, **kwargs):
203 """
204 Static method of the object class called by Python to create the object first and then
205 __init__() is called to initialize the object's attributes.
207 NOTE: this method is called directly from ObjectMapper.__new_container__ during the process of
208 constructing the object from builders that are read from a file.
209 """
210 inst = super().__new__(cls)
211 if cls._experimental:
212 warn(_exp_warn_msg(cls))
213 inst.__container_source = kwargs.pop('container_source', None)
214 inst.__parent = None
215 inst.__children = list()
216 inst.__modified = True
217 inst.__object_id = kwargs.pop('object_id', str(uuid4()))
218 # this variable is being passed in from ObjectMapper.__new_container__ and is
219 # reset to False in that method after the object has been initialized by __init__
220 inst._in_construct_mode = kwargs.pop('in_construct_mode', False)
221 inst.parent = kwargs.pop('parent', None)
222 return inst
224 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'})
225 def __init__(self, **kwargs):
226 name = getargs('name', kwargs)
227 if '/' in name:
228 raise ValueError("name '" + name + "' cannot contain '/'")
229 self.__name = name
230 self.__field_values = dict()
231 self.__read_io = None
232 self.__obj = None
234 @property
235 def read_io(self):
236 """
237 The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container.
239 This property will typically be None if this Container is not a root Container
240 (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the
241 :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container.
242 """
243 return self.__read_io
245 @read_io.setter
246 def read_io(self, value):
247 """
248 Set the io object used to read this container
250 :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use
251 :raises ValueError: If io has already been set. We can't change the IO for a container.
252 :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO`
253 """
254 # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need
255 # it for type checking we import it here.
256 from hdmf.backends.io import HDMFIO
257 if not isinstance(value, HDMFIO):
258 raise TypeError("io must be an instance of HDMFIO")
259 if self.__read_io is not None and self.__read_io is not value:
260 raise ValueError("io has already been set for this container (name=%s, type=%s)" %
261 (self.name, str(type(self))))
262 else:
263 self.__read_io = value
265 def get_read_io(self):
266 """
267 Get the io object used to read this container.
269 If `self.read_io` is None, this function will iterate through the parents and return the
270 first `io` object found on a parent container
272 :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container.
273 Returns None in case no io object is found, e.g., in case this container has
274 not been read from file.
275 """
276 curr_obj = self
277 re_io = self.read_io
278 while re_io is None and curr_obj.parent is not None:
279 curr_obj = curr_obj.parent
280 re_io = curr_obj.read_io
281 return re_io
283 @property
284 def name(self):
285 '''
286 The name of this Container
287 '''
288 return self.__name
290 @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to search for', 'default': None})
291 def get_ancestor(self, **kwargs):
292 """
293 Traverse parent hierarchy and return first instance of the specified data_type
294 """
295 data_type = getargs('data_type', kwargs)
296 if data_type is None:
297 return self.parent
298 p = self.parent
299 while p is not None:
300 if getattr(p, p._data_type_attr) == data_type:
301 return p
302 p = p.parent
303 return None
305 def all_children(self):
306 """Get a list of all child objects and their child objects recursively.
308 If the object has an object_id, the object will be added to "ret" to be returned.
309 If that object has children, they will be added to the "stack" in order to be:
310 1) Checked to see if has an object_id, if so then add to "ret"
311 2) Have children that will also be checked
312 """
313 stack = [self] # list of containers, including self, to add and later parse for children
314 ret = list()
315 self.__obj = LabelledDict(label='all_objects', key_attr='object_id')
316 while len(stack): # search until there's nothing in the list
317 n = stack.pop()
318 ret.append(n)
319 if n.object_id is not None:
320 self.__obj[n.object_id] = n
321 else: # pragma: no cover
322 # warn that a child does not have an object_id, which is unusual
323 warn('%s "%s" does not have an object_id' % (type(n).__class__, n.name))
324 if hasattr(n, 'children'): 324 ↛ 316line 324 didn't jump to line 316, because the condition on line 324 was never false
325 for c in n.children:
326 stack.append(c)
327 return ret
329 @property
330 def all_objects(self):
331 """Get a LabelledDict that indexed all child objects and their children by object ID."""
332 if self.__obj is None:
333 self.all_children()
334 return self.__obj
336 @docval()
337 def get_ancestors(self, **kwargs):
338 p = self.parent
339 ret = []
340 while p is not None:
341 ret.append(p)
342 p = p.parent
343 return tuple(ret)
345 @property
346 def fields(self):
347 '''
348 Subclasses use this class attribute to add properties to autogenerate.
349 `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}.
350 1. name: The name of the field property
351 2. child: A boolean value to set the parent/child relationship between the field property and the container.
352 3. required_name: The name the field property must have such that `name` matches `required_name`.
353 4. doc: Documentation of the field property
354 5. settable: If true, a setter function is created so that the field can be changed after creation.
355 '''
356 return self.__field_values
358 @property
359 def object_id(self):
360 if self.__object_id is None:
361 self.__object_id = str(uuid4())
362 return self.__object_id
364 @docval({'name': 'recurse', 'type': bool,
365 'doc': "whether or not to change the object ID of this container's children", 'default': True})
366 def generate_new_id(self, **kwargs):
367 """Changes the object ID of this Container and all of its children to a new UUID string."""
368 recurse = getargs('recurse', kwargs)
369 self.__object_id = str(uuid4())
370 self.set_modified()
371 if recurse:
372 for c in self.children:
373 c.generate_new_id(**kwargs)
375 @property
376 def modified(self):
377 return self.__modified
379 @docval({'name': 'modified', 'type': bool,
380 'doc': 'whether or not this Container has been modified', 'default': True})
381 def set_modified(self, **kwargs):
382 modified = getargs('modified', kwargs)
383 self.__modified = modified
384 if modified and isinstance(self.parent, Container):
385 self.parent.set_modified()
387 @property
388 def children(self):
389 return tuple(self.__children)
391 @docval({'name': 'child', 'type': 'Container',
392 'doc': 'the child Container for this Container', 'default': None})
393 def add_child(self, **kwargs):
394 warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.'))
395 child = getargs('child', kwargs)
396 if child is not None: 396 ↛ 404line 396 didn't jump to line 404, because the condition on line 396 was never false
397 # if child.parent is a Container, then the mismatch between child.parent and parent
398 # is used to make a soft/external link from the parent to a child elsewhere
399 # if child.parent is not a Container, it is either None or a Proxy and should be set to self
400 if not isinstance(child.parent, AbstractContainer): 400 ↛ exitline 400 didn't return from function 'add_child', because the condition on line 400 was never false
401 # actually add the child to the parent in parent setter
402 child.parent = self
403 else:
404 warn('Cannot add None as child to a container %s' % self.name)
406 @classmethod
407 def type_hierarchy(cls):
408 return cls.__mro__
410 @property
411 def container_source(self):
412 '''
413 The source of this Container
414 '''
415 return self.__container_source
417 @container_source.setter
418 def container_source(self, source):
419 if self.__container_source is not None:
420 raise Exception('cannot reassign container_source')
421 self.__container_source = source
423 @property
424 def parent(self):
425 '''
426 The parent Container of this Container
427 '''
428 # do it this way because __parent may not exist yet (not set in constructor)
429 return getattr(self, '_AbstractContainer__parent', None)
431 @parent.setter
432 def parent(self, parent_container):
433 if self.parent is parent_container:
434 return
436 if self.parent is not None:
437 if isinstance(self.parent, AbstractContainer):
438 raise ValueError(('Cannot reassign parent to Container: %s. '
439 'Parent is already: %s.' % (repr(self), repr(self.parent))))
440 else:
441 if parent_container is None:
442 raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self))
443 # NOTE this assumes isinstance(parent_container, Proxy) but we get a circular import
444 # if we try to do that
445 if self.parent.matches(parent_container):
446 self.__parent = parent_container
447 parent_container.__children.append(self)
448 parent_container.set_modified()
449 else:
450 self.__parent.add_candidate(parent_container)
451 else:
452 self.__parent = parent_container
453 if isinstance(parent_container, Container):
454 parent_container.__children.append(self)
455 parent_container.set_modified()
456 for child in self.children:
457 # used by hdmf.common.table.DynamicTableRegion to check for orphaned tables
458 child._validate_on_set_parent()
460 def _remove_child(self, child):
461 """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers."""
462 if not isinstance(child, AbstractContainer):
463 raise ValueError('Cannot remove non-AbstractContainer object from children.')
464 if child not in self.children:
465 raise ValueError("%s '%s' is not a child of %s '%s'." % (child.__class__.__name__, child.name,
466 self.__class__.__name__, self.name))
467 child.__parent = None
468 self.__children.remove(child)
469 child.set_modified()
470 self.set_modified()
472 def reset_parent(self):
473 """Reset the parent of this Container to None and remove the Container from the children of its parent.
475 Use with caution. This can result in orphaned containers and broken links.
476 """
477 if self.parent is None:
478 return
479 elif isinstance(self.parent, AbstractContainer):
480 self.parent._remove_child(self)
481 else:
482 raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent))
484 def _validate_on_set_parent(self):
485 """Validate this Container after setting the parent.
487 This method is called by the parent setter. It can be overridden in subclasses to perform additional
488 validation. The default implementation does nothing.
489 """
490 pass
493class Container(AbstractContainer):
494 """A container that can contain other containers and has special functionality for printing."""
496 _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'}
498 @classmethod
499 def _setter(cls, field):
500 """Returns a list of setter functions for the given field to be added to the class during class declaration."""
501 super_setter = AbstractContainer._setter(field)
502 ret = [super_setter]
503 # create setter with check for required name
504 # the AbstractContainer that is passed to the setter must have name = required_name
505 if field.get('required_name', None) is not None:
506 required_name = field['required_name']
507 idx1 = len(ret) - 1
509 def container_setter(self, val):
510 if val is not None:
511 if not isinstance(val, AbstractContainer):
512 msg = ("Field '%s' on %s has a required name and must be a subclass of AbstractContainer."
513 % (field['name'], self.__class__.__name__))
514 raise ValueError(msg)
515 if val.name != required_name:
516 msg = ("Field '%s' on %s must be named '%s'."
517 % (field['name'], self.__class__.__name__, required_name))
518 raise ValueError(msg)
519 ret[idx1](self, val) # call the previous setter
521 ret.append(container_setter)
523 # create setter that accepts a value or tuple, list, or dict or values and sets the value's parent to self
524 if field.get('child', False):
525 idx2 = len(ret) - 1
527 def container_setter(self, val):
528 ret[idx2](self, val) # call the previous setter
529 if val is not None:
530 if isinstance(val, (tuple, list)):
531 pass
532 elif isinstance(val, dict):
533 val = val.values()
534 else:
535 val = [val]
536 for v in val:
537 if not isinstance(v.parent, Container):
538 v.parent = self
539 else:
540 # the ObjectMapper will create a link from self (parent) to v (child with existing parent)
541 # still need to mark self as modified
542 self.set_modified()
544 ret.append(container_setter)
545 return ret[-1] # return the last setter (which should call the previous setters, if applicable)
547 def __repr__(self):
548 cls = self.__class__
549 template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self))
550 if len(self.fields):
551 template += "\nFields:\n"
552 for k in sorted(self.fields): # sorted to enable tests
553 v = self.fields[k]
554 # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
555 if hasattr(v, '__len__'): 555 ↛ 562line 555 didn't jump to line 562, because the condition on line 555 was never false
556 if isinstance(v, (np.ndarray, list, tuple)):
557 if len(v) > 0:
558 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
559 elif v:
560 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
561 else:
562 template += " {}: {}\n".format(k, v)
563 return template
565 def _repr_html_(self):
566 CSS_STYLE = """
567 <style>
568 .container-fields {
569 font-family: "Open Sans", Arial, sans-serif;
570 }
571 .container-fields .field-value {
572 color: #00788E;
573 }
574 .container-fields details > summary {
575 cursor: pointer;
576 display: list-item;
577 }
578 .container-fields details > summary:hover {
579 color: #0A6EAA;
580 }
581 </style>
582 """
584 JS_SCRIPT = """
585 <script>
586 function copyToClipboard(text) {
587 navigator.clipboard.writeText(text).then(function() {
588 console.log('Copied to clipboard: ' + text);
589 }, function(err) {
590 console.error('Could not copy text: ', err);
591 });
592 }
594 document.addEventListener('DOMContentLoaded', function() {
595 let fieldKeys = document.querySelectorAll('.container-fields .field-key');
596 fieldKeys.forEach(function(fieldKey) {
597 fieldKey.addEventListener('click', function() {
598 let accessCode = fieldKey.getAttribute('title').replace('Access code: ', '');
599 copyToClipboard(accessCode);
600 });
601 });
602 });
603 </script>
604 """
605 if self.name == self.__class__.__name__:
606 header_text = self.name
607 else:
608 header_text = f"{self.name} ({self.__class__.__name__})"
609 html_repr = CSS_STYLE
610 html_repr += JS_SCRIPT
611 html_repr += "<div class='container-wrap'>"
612 html_repr += (
613 f"<div class='container-header'><div class='xr-obj-type'><h3>{header_text}</h3></div></div>"
614 )
615 html_repr += self._generate_html_repr(self.fields)
616 html_repr += "</div>"
617 return html_repr
619 def _generate_html_repr(self, fields, level=0, access_code=".fields"):
620 html_repr = ""
622 if isinstance(fields, dict):
623 for key, value in fields.items():
624 current_access_code = f"{access_code}['{key}']"
625 if (
626 isinstance(value, (list, dict, np.ndarray))
627 or hasattr(value, "fields")
628 ):
629 label = key
630 if isinstance(value, dict):
631 label += f" ({len(value)})"
633 html_repr += (
634 f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
635 f'class="container-fields field-key" title="{current_access_code}"><b>{label}</b></summary>'
636 )
637 if hasattr(value, "fields"):
638 value = value.fields
639 current_access_code = current_access_code + ".fields"
640 html_repr += self._generate_html_repr(
641 value, level + 1, current_access_code
642 )
643 html_repr += "</details>"
644 else:
645 html_repr += (
646 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"'
647 f' title="{current_access_code}">{key}:</span> <span class="field-value">{value}</span></div>'
648 )
649 elif isinstance(fields, list): 649 ↛ 656line 649 didn't jump to line 656, because the condition on line 649 was never false
650 for index, item in enumerate(fields):
651 current_access_code = f"{access_code}[{index}]"
652 html_repr += (
653 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-value"'
654 f' title="{current_access_code}">{str(item)}</span></div>'
655 )
656 elif isinstance(fields, np.ndarray):
657 str_ = str(fields).replace("\n", "</br>")
658 html_repr += (
659 f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
660 )
661 else:
662 pass
664 return html_repr
666 @staticmethod
667 def __smart_str(v, num_indent):
668 """
669 Print compact string representation of data.
671 If v is a list, try to print it using numpy. This will condense the string
672 representation of datasets with many elements. If that doesn't work, just print the list.
674 If v is a dictionary, print the name and type of each element
676 If v is a set, print it sorted
678 If v is a neurodata_type, print the name of type
680 Otherwise, use the built-in str()
681 Parameters
682 ----------
683 v
685 Returns
686 -------
687 str
689 """
691 if isinstance(v, list) or isinstance(v, tuple):
692 if len(v) and isinstance(v[0], AbstractContainer):
693 return Container.__smart_str_list(v, num_indent, '(')
694 try:
695 return str(np.asarray(v))
696 except ValueError:
697 return Container.__smart_str_list(v, num_indent, '(')
698 elif isinstance(v, dict):
699 return Container.__smart_str_dict(v, num_indent)
700 elif isinstance(v, set): 700 ↛ 701line 700 didn't jump to line 701, because the condition on line 700 was never true
701 return Container.__smart_str_list(sorted(list(v)), num_indent, '{')
702 elif isinstance(v, AbstractContainer):
703 return "{} {}".format(getattr(v, 'name'), type(v))
704 else:
705 return str(v)
707 @staticmethod
708 def __smart_str_list(str_list, num_indent, left_br):
709 if left_br == '(': 709 ↛ 711line 709 didn't jump to line 711, because the condition on line 709 was never false
710 right_br = ')'
711 if left_br == '{': 711 ↛ 712line 711 didn't jump to line 712, because the condition on line 711 was never true
712 right_br = '}'
713 if len(str_list) == 0: 713 ↛ 714line 713 didn't jump to line 714, because the condition on line 713 was never true
714 return left_br + ' ' + right_br
715 indent = num_indent * 2 * ' '
716 indent_in = (num_indent + 1) * 2 * ' '
717 out = left_br
718 for v in str_list[:-1]:
719 out += '\n' + indent_in + Container.__smart_str(v, num_indent + 1) + ','
720 if str_list: 720 ↛ 722line 720 didn't jump to line 722, because the condition on line 720 was never false
721 out += '\n' + indent_in + Container.__smart_str(str_list[-1], num_indent + 1)
722 out += '\n' + indent + right_br
723 return out
725 @staticmethod
726 def __smart_str_dict(d, num_indent):
727 left_br = '{'
728 right_br = '}'
729 if len(d) == 0: 729 ↛ 730line 729 didn't jump to line 730, because the condition on line 729 was never true
730 return left_br + ' ' + right_br
731 indent = num_indent * 2 * ' '
732 indent_in = (num_indent + 1) * 2 * ' '
733 out = left_br
734 keys = sorted(list(d.keys()))
735 for k in keys[:-1]:
736 out += '\n' + indent_in + Container.__smart_str(k, num_indent + 1) + ' ' + str(type(d[k])) + ','
737 if keys: 737 ↛ 739line 737 didn't jump to line 739, because the condition on line 737 was never false
738 out += '\n' + indent_in + Container.__smart_str(keys[-1], num_indent + 1) + ' ' + str(type(d[keys[-1]]))
739 out += '\n' + indent + right_br
740 return out
742 def set_data_io(self, dataset_name, data_io_class, **kwargs):
743 data = self.fields.get(dataset_name)
744 if data is None:
745 raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class")
746 self.fields[dataset_name] = data_io_class(data=data, **kwargs)
749class Data(AbstractContainer):
750 """
751 A class for representing dataset containers
752 """
754 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'},
755 {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'})
756 def __init__(self, **kwargs):
757 data = popargs('data', kwargs)
758 super().__init__(**kwargs)
759 self.__data = data
761 @property
762 def data(self):
763 return self.__data
765 @property
766 def shape(self):
767 """
768 Get the shape of the data represented by this container
769 :return: Shape tuple
770 :rtype: tuple of ints
771 """
772 return get_data_shape(self.__data)
774 @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'})
775 def set_dataio(self, **kwargs):
776 """
777 Apply DataIO object to the data held by this Data object
778 """
779 dataio = getargs('dataio', kwargs)
780 dataio.data = self.__data
781 self.__data = dataio
783 @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
784 def transform(self, **kwargs):
785 """
786 Transform data from the current underlying state.
788 This function can be used to permanently load data from disk, or convert to a different
789 representation, such as a torch.Tensor
790 """
791 func = getargs('func', kwargs)
792 self.__data = func(self.__data)
793 return self
795 def __bool__(self):
796 if self.data is not None: 796 ↛ 801line 796 didn't jump to line 801, because the condition on line 796 was never false
797 if isinstance(self.data, (np.ndarray, tuple, list)):
798 return len(self.data) != 0
799 if self.data: 799 ↛ 801line 799 didn't jump to line 801, because the condition on line 799 was never false
800 return True
801 return False
803 def __len__(self):
804 return len(self.__data)
806 def __getitem__(self, args):
807 return self.get(args)
809 def get(self, args):
810 if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)):
811 return [self.data[i] for i in args]
812 if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray):
813 # This is needed for h5py 2.9 compatibility
814 args = args.tolist()
815 return self.data[args]
817 def append(self, arg):
818 self.__data = append_data(self.__data, arg)
820 def extend(self, arg):
821 """
822 The extend_data method adds all the elements of the iterable arg to the
823 end of the data of this Data container.
825 :param arg: The iterable to add to the end of this VectorData
826 """
827 self.__data = extend_data(self.__data, arg)
830class DataRegion(Data):
832 @property
833 @abstractmethod
834 def data(self):
835 '''
836 The target data that this region applies to
837 '''
838 pass
840 @property
841 @abstractmethod
842 def region(self):
843 '''
844 The region that indexes into data e.g. slice or list of indices
845 '''
846 pass
849class MultiContainerInterface(Container):
850 """Class that dynamically defines methods to support a Container holding multiple Containers of the same type.
852 To use, extend this class and create a dictionary as a class attribute with any of the following keys:
853 * 'attr' to name the attribute that stores the Container instances
854 * 'type' to provide the Container object type (type or list/tuple of types, type can be a docval macro)
855 * 'add' to name the method for adding Container instances
856 * 'get' to name the method for getting Container instances
857 * 'create' to name the method for creating Container instances (only if a single type is specified)
859 If the attribute does not exist in the class, it will be generated. If it does exist, it should behave like a dict.
861 The keys 'attr', 'type', and 'add' are required.
862 """
864 def __new__(cls, *args, **kwargs):
865 if cls is MultiContainerInterface:
866 raise TypeError("Can't instantiate class MultiContainerInterface.")
867 if not hasattr(cls, '__clsconf__'):
868 raise TypeError("MultiContainerInterface subclass %s is missing __clsconf__ attribute. Please check that "
869 "the class is properly defined." % cls.__name__)
870 return super().__new__(cls, *args, **kwargs)
872 @staticmethod
873 def __add_article(noun):
874 if isinstance(noun, tuple):
875 noun = noun[0]
876 if isinstance(noun, type): 876 ↛ 878line 876 didn't jump to line 878, because the condition on line 876 was never false
877 noun = noun.__name__
878 if noun[0] in ('aeiouAEIOU'):
879 return 'an %s' % noun
880 return 'a %s' % noun
882 @staticmethod
883 def __join(argtype):
884 """Return a grammatical string representation of a list or tuple of classes or text.
886 Examples:
887 cls.__join(Container) returns "Container"
888 cls.__join((Container, )) returns "Container"
889 cls.__join((Container, Data)) returns "Container or Data"
890 cls.__join((Container, Data, Subcontainer)) returns "Container, Data, or Subcontainer"
891 """
893 def tostr(x):
894 return x.__name__ if isinstance(x, type) else x
896 if isinstance(argtype, (list, tuple)):
897 args_str = [tostr(x) for x in argtype]
898 if len(args_str) == 1:
899 return args_str[0]
900 if len(args_str) == 2:
901 return " or ".join(tostr(x) for x in args_str)
902 else:
903 return ", ".join(tostr(x) for x in args_str[:-1]) + ', or ' + args_str[-1]
904 else:
905 return tostr(argtype)
907 @classmethod
908 def __make_get(cls, func_name, attr_name, container_type):
909 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
911 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
912 'default': None},
913 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
914 func_name=func_name, doc=doc)
915 def _func(self, **kwargs):
916 name = getargs('name', kwargs)
917 d = getattr(self, attr_name)
918 ret = None
919 if name is None:
920 if len(d) > 1:
921 msg = ("More than one element in %s of %s '%s' -- must specify a name."
922 % (attr_name, cls.__name__, self.name))
923 raise ValueError(msg)
924 elif len(d) == 0:
925 msg = "%s of %s '%s' is empty." % (attr_name, cls.__name__, self.name)
926 raise ValueError(msg)
927 else: # only one item in dict
928 for v in d.values():
929 ret = v
930 else:
931 ret = d.get(name)
932 if ret is None:
933 msg = "'%s' not found in %s of %s '%s'." % (name, attr_name, cls.__name__, self.name)
934 raise KeyError(msg)
935 return ret
937 return _func
939 @classmethod
940 def __make_getitem(cls, attr_name, container_type):
941 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
943 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
944 'default': None},
945 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
946 func_name='__getitem__', doc=doc)
947 def _func(self, **kwargs):
948 # NOTE this is the same code as the getter but with different error messages
949 name = getargs('name', kwargs)
950 d = getattr(self, attr_name)
951 ret = None
952 if name is None:
953 if len(d) > 1:
954 msg = ("More than one %s in %s '%s' -- must specify a name."
955 % (cls.__join(container_type), cls.__name__, self.name))
956 raise ValueError(msg)
957 elif len(d) == 0:
958 msg = "%s '%s' is empty." % (cls.__name__, self.name)
959 raise ValueError(msg)
960 else: # only one item in dict
961 for v in d.values():
962 ret = v
963 else:
964 ret = d.get(name)
965 if ret is None:
966 msg = "'%s' not found in %s '%s'." % (name, cls.__name__, self.name)
967 raise KeyError(msg)
968 return ret
970 return _func
972 @classmethod
973 def __make_add(cls, func_name, attr_name, container_type):
974 doc = "Add one or multiple %s objects to this %s" % (cls.__join(container_type), cls.__name__)
976 @docval({'name': attr_name, 'type': (list, tuple, dict, container_type),
977 'doc': 'one or multiple %s objects to add to this %s' % (cls.__join(container_type), cls.__name__)},
978 func_name=func_name, doc=doc)
979 def _func(self, **kwargs):
980 container = getargs(attr_name, kwargs)
981 if isinstance(container, container_type):
982 containers = [container]
983 elif isinstance(container, dict):
984 containers = container.values()
985 else:
986 containers = container
987 d = getattr(self, attr_name)
988 for tmp in containers:
989 if not isinstance(tmp.parent, Container):
990 tmp.parent = self
991 else:
992 # the ObjectMapper will create a link from self (parent) to tmp (child with existing parent)
993 # still need to mark self as modified
994 self.set_modified()
995 if tmp.name in d:
996 msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
997 raise ValueError(msg)
998 d[tmp.name] = tmp
999 return container
1001 return _func
1003 @classmethod
1004 def __make_create(cls, func_name, add_name, container_type):
1005 doc = "Create %s object and add it to this %s" % (cls.__add_article(container_type), cls.__name__)
1007 @docval(*get_docval(container_type.__init__), func_name=func_name, doc=doc,
1008 returns="the %s object that was created" % cls.__join(container_type), rtype=container_type)
1009 def _func(self, **kwargs):
1010 ret = container_type(**kwargs)
1011 getattr(self, add_name)(ret)
1012 return ret
1014 return _func
1016 @classmethod
1017 def __make_constructor(cls, clsconf):
1018 args = list()
1019 for conf in clsconf:
1020 attr_name = conf['attr']
1021 container_type = conf['type']
1022 args.append({'name': attr_name, 'type': (list, tuple, dict, container_type),
1023 'doc': '%s to store in this interface' % cls.__join(container_type), 'default': dict()})
1025 args.append({'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': cls.__name__})
1027 @docval(*args, func_name='__init__')
1028 def _func(self, **kwargs):
1029 super().__init__(name=kwargs['name'])
1030 for conf in clsconf:
1031 attr_name = conf['attr']
1032 add_name = conf['add']
1033 container = popargs(attr_name, kwargs)
1034 add = getattr(self, add_name)
1035 add(container)
1037 return _func
1039 @classmethod
1040 def __make_getter(cls, attr):
1041 """Make a getter function for creating a :py:func:`property`"""
1043 def _func(self):
1044 # initialize the field to an empty labeled dict if it has not yet been
1045 # do this here to avoid creating default __init__ which may or may not be overridden in
1046 # custom classes and dynamically generated classes
1047 if attr not in self.fields:
1048 def _remove_child(child):
1049 if child.parent is self:
1050 self._remove_child(child)
1051 self.fields[attr] = LabelledDict(attr, remove_callable=_remove_child)
1053 return self.fields.get(attr)
1055 return _func
1057 @classmethod
1058 def __make_setter(cls, add_name):
1059 """Make a setter function for creating a :py:func:`property`"""
1061 @docval({'name': 'val', 'type': (list, tuple, dict), 'doc': 'the sub items to add', 'default': None})
1062 def _func(self, **kwargs):
1063 val = getargs('val', kwargs)
1064 if val is None:
1065 return
1066 getattr(self, add_name)(val)
1068 return _func
1070 @ExtenderMeta.pre_init
1071 def __build_class(cls, name, bases, classdict):
1072 """Verify __clsconf__ and create methods based on __clsconf__.
1073 This method is called prior to __new__ and __init__ during class declaration in the metaclass.
1074 """
1075 if not hasattr(cls, '__clsconf__'):
1076 return
1078 multi = False
1079 if isinstance(cls.__clsconf__, dict):
1080 clsconf = [cls.__clsconf__]
1081 elif isinstance(cls.__clsconf__, list):
1082 multi = True
1083 clsconf = cls.__clsconf__
1084 else:
1085 raise TypeError("'__clsconf__' for MultiContainerInterface subclass %s must be a dict or a list of "
1086 "dicts." % cls.__name__)
1088 for conf_index, conf_dict in enumerate(clsconf):
1089 cls.__build_conf_methods(conf_dict, conf_index, multi)
1091 # make __getitem__ (square bracket access) only if one conf type is defined
1092 if len(clsconf) == 1:
1093 attr = clsconf[0].get('attr')
1094 container_type = clsconf[0].get('type')
1095 setattr(cls, '__getitem__', cls.__make_getitem(attr, container_type))
1097 # create the constructor, only if it has not been overridden
1098 # i.e. it is the same method as the parent class constructor
1099 if '__init__' not in cls.__dict__:
1100 setattr(cls, '__init__', cls.__make_constructor(clsconf))
1102 @classmethod
1103 def __build_conf_methods(cls, conf_dict, conf_index, multi):
1104 # get add method name
1105 add = conf_dict.get('add')
1106 if add is None:
1107 msg = "MultiContainerInterface subclass %s is missing 'add' key in __clsconf__" % cls.__name__
1108 if multi:
1109 msg += " at index %d" % conf_index
1110 raise ValueError(msg)
1112 # get container attribute name
1113 attr = conf_dict.get('attr')
1114 if attr is None:
1115 msg = "MultiContainerInterface subclass %s is missing 'attr' key in __clsconf__" % cls.__name__
1116 if multi:
1117 msg += " at index %d" % conf_index
1118 raise ValueError(msg)
1120 # get container type
1121 container_type = conf_dict.get('type')
1122 if container_type is None:
1123 msg = "MultiContainerInterface subclass %s is missing 'type' key in __clsconf__" % cls.__name__
1124 if multi:
1125 msg += " at index %d" % conf_index
1126 raise ValueError(msg)
1128 # create property with the name given in 'attr' only if the attribute is not already defined
1129 if not hasattr(cls, attr):
1130 getter = cls.__make_getter(attr)
1131 setter = cls.__make_setter(add)
1132 doc = "a dictionary containing the %s in this %s" % (cls.__join(container_type), cls.__name__)
1133 setattr(cls, attr, property(getter, setter, None, doc))
1135 # create the add method
1136 setattr(cls, add, cls.__make_add(add, attr, container_type))
1138 # create the create method, only if a single container type is specified
1139 create = conf_dict.get('create')
1140 if create is not None:
1141 if isinstance(container_type, type):
1142 setattr(cls, create, cls.__make_create(create, add, container_type))
1143 else:
1144 msg = ("Cannot specify 'create' key in __clsconf__ for MultiContainerInterface subclass %s "
1145 "when 'type' key is not a single type") % cls.__name__
1146 if multi:
1147 msg += " at index %d" % conf_index
1148 raise ValueError(msg)
1150 # create the get method
1151 get = conf_dict.get('get')
1152 if get is not None:
1153 setattr(cls, get, cls.__make_get(get, attr, container_type))
1156class Row(object, metaclass=ExtenderMeta):
1157 """
1158 A class for representing rows from a Table.
1160 The Table class can be indicated with the __table__. Doing so
1161 will set constructor arguments for the Row class and ensure that
1162 Row.idx is set appropriately when a Row is added to the Table. It will
1163 also add functionality to the Table class for getting Row objects.
1165 Note, the Row class is not needed for working with Table objects. This
1166 is merely convenience functionality for working with Tables.
1167 """
1169 __table__ = None
1171 @property
1172 def idx(self):
1173 """The index of this row in its respective Table"""
1174 return self.__idx
1176 @idx.setter
1177 def idx(self, val):
1178 if self.__idx is None: 1178 ↛ 1181line 1178 didn't jump to line 1181, because the condition on line 1178 was never false
1179 self.__idx = val
1180 else:
1181 raise ValueError("cannot reset the ID of a row object")
1183 @property
1184 def table(self):
1185 """The Table this Row comes from"""
1186 return self.__table
1188 @table.setter
1189 def table(self, val):
1190 if val is not None: 1190 ↛ 1192line 1190 didn't jump to line 1192, because the condition on line 1190 was never false
1191 self.__table = val
1192 if self.idx is None:
1193 self.idx = self.__table.add_row(**self.todict())
1195 @ExtenderMeta.pre_init
1196 def __build_row_class(cls, name, bases, classdict):
1197 table_cls = getattr(cls, '__table__', None)
1198 if table_cls is not None:
1199 columns = getattr(table_cls, '__columns__')
1200 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1200 ↛ 1231line 1200 didn't jump to line 1231, because the condition on line 1200 was never false
1201 columns = deepcopy(columns)
1202 func_args = list()
1203 for col in columns:
1204 func_args.append(col)
1205 func_args.append({'name': 'table', 'type': Table, 'default': None,
1206 'help': 'the table this row is from'})
1207 func_args.append({'name': 'idx', 'type': int, 'default': None,
1208 'help': 'the index for this row'})
1210 @docval(*func_args)
1211 def __init__(self, **kwargs):
1212 super(cls, self).__init__()
1213 table, idx = popargs('table', 'idx', kwargs)
1214 self.__keys = list()
1215 self.__idx = None
1216 self.__table = None
1217 for k, v in kwargs.items():
1218 self.__keys.append(k)
1219 setattr(self, k, v)
1220 self.idx = idx
1221 self.table = table
1223 setattr(cls, '__init__', __init__)
1225 def todict(self):
1226 return {k: getattr(self, k) for k in self.__keys}
1228 setattr(cls, 'todict', todict)
1230 # set this so Table.row gets set when a Table is instantiated
1231 table_cls.__rowclass__ = cls
1232 else:
1233 if bases != (object,):
1234 raise ValueError('__table__ must be set if sub-classing Row')
1236 def __eq__(self, other):
1237 return self.idx == other.idx and self.table is other.table
1239 def __str__(self):
1240 return "Row(%i, %s) = %s" % (self.idx, self.table.name, str(self.todict()))
1243class RowGetter:
1244 """
1245 A simple class for providing __getitem__ functionality that returns
1246 Row objects to a Table.
1247 """
1249 def __init__(self, table):
1250 self.table = table
1251 self.cache = dict()
1253 def __getitem__(self, idx):
1254 ret = self.cache.get(idx)
1255 if ret is None:
1256 row = self.table[idx]
1257 ret = self.table.__rowclass__(*row, table=self.table, idx=idx)
1258 self.cache[idx] = ret
1259 return ret
1262class Table(Data):
1263 r'''
1264 Subclasses should specify the class attribute \_\_columns\_\_.
1266 This should be a list of dictionaries with the following keys:
1268 - ``name`` the column name
1269 - ``type`` the type of data in this column
1270 - ``doc`` a brief description of what gets stored in this column
1272 For reference, this list of dictionaries will be used with docval to autogenerate
1273 the ``add_row`` method for adding data to this table.
1275 If \_\_columns\_\_ is not specified, no custom ``add_row`` method will be added.
1277 The class attribute __defaultname__ can also be set to specify a default name
1278 for the table class. If \_\_defaultname\_\_ is not specified, then ``name`` will
1279 need to be specified when the class is instantiated.
1281 A Table class can be paired with a Row class for conveniently working with rows of
1282 a Table. This pairing must be indicated in the Row class implementation. See Row
1283 for more details.
1284 '''
1286 # This class attribute is used to indicate which Row class should be used when
1287 # adding RowGetter functionality to the Table.
1288 __rowclass__ = None
1290 @ExtenderMeta.pre_init
1291 def __build_table_class(cls, name, bases, classdict):
1292 if hasattr(cls, '__columns__'):
1293 columns = getattr(cls, '__columns__')
1295 idx = dict()
1296 for i, col in enumerate(columns):
1297 idx[col['name']] = i
1298 setattr(cls, '__colidx__', idx)
1300 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1300 ↛ 1316line 1300 didn't jump to line 1316, because the condition on line 1300 was never false
1301 name = {'name': 'name', 'type': str, 'doc': 'the name of this table'}
1302 defname = getattr(cls, '__defaultname__', None)
1303 if defname is not None: 1303 ↛ 1306line 1303 didn't jump to line 1306, because the condition on line 1303 was never false
1304 name['default'] = defname # override the name with the default name if present
1306 @docval(name,
1307 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the data in this table',
1308 'default': list()})
1309 def __init__(self, **kwargs):
1310 name, data = getargs('name', 'data', kwargs)
1311 colnames = [i['name'] for i in columns]
1312 super(cls, self).__init__(colnames, name, data)
1314 setattr(cls, '__init__', __init__)
1316 if cls.add_row == bases[-1].add_row: # check if add_row is overridden 1316 ↛ exitline 1316 didn't return from function '__build_table_class', because the condition on line 1316 was never false
1318 @docval(*columns)
1319 def add_row(self, **kwargs):
1320 return super(cls, self).add_row(kwargs)
1322 setattr(cls, 'add_row', add_row)
1324 @docval({'name': 'columns', 'type': (list, tuple), 'doc': 'a list of the columns in this table'},
1325 {'name': 'name', 'type': str, 'doc': 'the name of this container'},
1326 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the source of the data', 'default': list()})
1327 def __init__(self, **kwargs):
1328 self.__columns = tuple(popargs('columns', kwargs))
1329 self.__col_index = {name: idx for idx, name in enumerate(self.__columns)}
1330 if getattr(self, '__rowclass__') is not None:
1331 self.row = RowGetter(self)
1332 super().__init__(**kwargs)
1334 @property
1335 def columns(self):
1336 return self.__columns
1338 @docval({'name': 'values', 'type': dict, 'doc': 'the values for each column'})
1339 def add_row(self, **kwargs):
1340 values = getargs('values', kwargs)
1341 if not isinstance(self.data, list): 1341 ↛ 1342line 1341 didn't jump to line 1342, because the condition on line 1341 was never true
1342 msg = 'Cannot append row to %s' % type(self.data)
1343 raise ValueError(msg)
1344 ret = len(self.data)
1345 row = [values[col] for col in self.columns]
1346 row = [v.idx if isinstance(v, Row) else v for v in row]
1347 self.data.append(tuple(row))
1348 return ret
1350 def which(self, **kwargs):
1351 '''
1352 Query a table
1353 '''
1354 if len(kwargs) != 1: 1354 ↛ 1355line 1354 didn't jump to line 1355, because the condition on line 1354 was never true
1355 raise ValueError("only one column can be queried")
1356 colname, value = kwargs.popitem()
1357 idx = self.__colidx__.get(colname)
1358 if idx is None: 1358 ↛ 1359line 1358 didn't jump to line 1359, because the condition on line 1358 was never true
1359 msg = "no '%s' column in %s" % (colname, self.__class__.__name__)
1360 raise KeyError(msg)
1361 ret = list()
1362 for i in range(len(self.data)):
1363 row = self.data[i]
1364 row_val = row[idx]
1365 if row_val == value:
1366 ret.append(i)
1367 return ret
1369 def __len__(self):
1370 return len(self.data)
1372 def __getitem__(self, args):
1373 idx = args
1374 col = None
1375 if isinstance(args, tuple):
1376 idx = args[1]
1377 if isinstance(args[0], str): 1377 ↛ 1379line 1377 didn't jump to line 1379, because the condition on line 1377 was never false
1378 col = self.__col_index.get(args[0])
1379 elif isinstance(args[0], int):
1380 col = args[0]
1381 else:
1382 raise KeyError('first argument must be a column name or index')
1383 return self.data[idx][col]
1384 elif isinstance(args, str):
1385 col = self.__col_index.get(args)
1386 if col is None: 1386 ↛ 1387line 1386 didn't jump to line 1387, because the condition on line 1386 was never true
1387 raise KeyError(args)
1388 return [row[col] for row in self.data]
1389 else:
1390 return self.data[idx]
1392 def to_dataframe(self):
1393 '''Produce a pandas DataFrame containing this table's data.
1394 '''
1396 data = {colname: self[colname] for ii, colname in enumerate(self.columns)}
1397 return pd.DataFrame(data)
1399 @classmethod
1400 @docval(
1401 {'name': 'df', 'type': pd.DataFrame, 'doc': 'input data'},
1402 {'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': None},
1403 {
1404 'name': 'extra_ok',
1405 'type': bool,
1406 'doc': 'accept (and ignore) unexpected columns on the input dataframe',
1407 'default': False
1408 },
1409 )
1410 def from_dataframe(cls, **kwargs):
1411 '''Construct an instance of Table (or a subclass) from a pandas DataFrame. The columns of the dataframe
1412 should match the columns defined on the Table subclass.
1413 '''
1415 df, name, extra_ok = getargs('df', 'name', 'extra_ok', kwargs)
1417 cls_cols = list([col['name'] for col in getattr(cls, '__columns__')])
1418 df_cols = list(df.columns)
1420 missing_columns = set(cls_cols) - set(df_cols)
1421 extra_columns = set(df_cols) - set(cls_cols)
1423 if extra_columns:
1424 raise ValueError(
1425 'unrecognized column(s) {} for table class {} (columns {})'.format(
1426 extra_columns, cls.__name__, cls_cols
1427 )
1428 )
1430 use_index = False
1431 if len(missing_columns) == 1 and list(missing_columns)[0] == df.index.name: 1431 ↛ 1432line 1431 didn't jump to line 1432, because the condition on line 1431 was never true
1432 use_index = True
1434 elif missing_columns: 1434 ↛ 1435line 1434 didn't jump to line 1435, because the condition on line 1434 was never true
1435 raise ValueError(
1436 'missing column(s) {} for table class {} (columns {}, provided {})'.format(
1437 missing_columns, cls.__name__, cls_cols, df_cols
1438 )
1439 )
1441 data = []
1442 for index, row in df.iterrows():
1443 if use_index: 1443 ↛ 1444line 1443 didn't jump to line 1444, because the condition on line 1443 was never true
1444 data.append([
1445 row[colname] if colname != df.index.name else index
1446 for colname in cls_cols
1447 ])
1448 else:
1449 data.append(tuple([row[colname] for colname in cls_cols]))
1451 if name is None:
1452 return cls(data=data)
1453 return cls(name=name, data=data)