Coverage for src/hdmf/container.py: 94%
870 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-08-18 20:49 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-08-18 20:49 +0000
1import types
2from abc import abstractmethod
3from collections import OrderedDict
4from copy import deepcopy
5from uuid import uuid4
6from warnings import warn
8import h5py
9import numpy as np
10import pandas as pd
12from .data_utils import DataIO, append_data, extend_data
13from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
14from hdmf.term_set import TermSet
17def _set_exp(cls):
18 """Set a class as being experimental"""
19 cls._experimental = True
22def _exp_warn_msg(cls):
23 """Generate a warning message experimental features"""
24 pfx = cls
25 if isinstance(cls, type): 25 ↛ 27line 25 didn't jump to line 27, because the condition on line 25 was never false
26 pfx = cls.__name__
27 msg = ('%s is experimental -- it may be removed in the future and '
28 'is not guaranteed to maintain backward compatibility') % pfx
29 return msg
32class HERDManager:
33 """
34 This class manages whether to set/attach an instance of HERD to the subclass.
35 """
37 @docval({'name': 'herd', 'type': 'HERD',
38 'doc': 'The external resources to be used for the container.'},)
39 def link_resources(self, **kwargs):
40 """
41 Method to attach an instance of HERD in order to auto-add terms/references to data.
42 """
43 self._herd = kwargs['herd']
45 def get_linked_resources(self):
46 return self._herd if hasattr(self, "_herd") else None
49class AbstractContainer(metaclass=ExtenderMeta):
50 # The name of the class attribute that subclasses use to autogenerate properties
51 # This parameterization is supplied in case users would like to configure
52 # the class attribute name to something domain-specific
54 _experimental = False
56 _fieldsname = '__fields__'
58 _data_type_attr = 'data_type'
60 # Subclasses use this class attribute to add properties to autogenerate
61 # Autogenerated properties will store values in self.__field_values
62 __fields__ = tuple()
64 # This field is automatically set by __gather_fields before initialization.
65 # It holds all the values in __fields__ for this class and its parent classes.
66 __fieldsconf = tuple()
68 _pconf_allowed_keys = {'name', 'doc', 'settable'}
70 # Override the _setter factor function, so directives that apply to
71 # Container do not get used on Data
72 @classmethod
73 def _setter(cls, field):
74 """
75 Make a setter function for creating a :py:func:`property`
76 """
77 name = field['name']
79 if not field.get('settable', True):
80 return None
82 def setter(self, val):
83 if val is None:
84 return
85 if name in self.fields:
86 msg = "can't set attribute '%s' -- already set" % name
87 raise AttributeError(msg)
88 self.fields[name] = val
90 return setter
92 @classmethod
93 def _getter(cls, field):
94 """
95 Make a getter function for creating a :py:func:`property`
96 """
97 doc = field.get('doc')
98 name = field['name']
100 def getter(self):
101 return self.fields.get(name)
103 setattr(getter, '__doc__', doc)
104 return getter
106 @staticmethod
107 def _check_field_spec(field):
108 """
109 A helper function for __gather_fields to make sure we are always working
110 with a dict specification and that the specification contains the correct keys
111 """
112 tmp = field
113 if isinstance(tmp, dict):
114 if 'name' not in tmp:
115 raise ValueError("must specify 'name' if using dict in __fields__")
116 else:
117 tmp = {'name': tmp}
118 return tmp
120 @classmethod
121 def _check_field_spec_keys(cls, field_conf):
122 for k in field_conf:
123 if k not in cls._pconf_allowed_keys:
124 msg = ("Unrecognized key '%s' in %s config '%s' on %s"
125 % (k, cls._fieldsname, field_conf['name'], cls.__name__))
126 raise ValueError(msg)
128 @classmethod
129 def _get_fields(cls):
130 return getattr(cls, cls._fieldsname)
132 @classmethod
133 def _set_fields(cls, value):
134 return setattr(cls, cls._fieldsname, value)
136 @classmethod
137 def get_fields_conf(cls):
138 return cls.__fieldsconf
140 @ExtenderMeta.pre_init
141 def __gather_fields(cls, name, bases, classdict):
142 '''
143 This classmethod will be called during class declaration in the metaclass to automatically
144 create setters and getters for fields that need to be exported
145 '''
146 fields = cls._get_fields()
147 if not isinstance(fields, tuple):
148 msg = "'%s' must be of type tuple" % cls._fieldsname
149 raise TypeError(msg)
151 # check field specs and create map from field name to field conf dictionary
152 fields_dict = OrderedDict()
153 for f in fields:
154 pconf = cls._check_field_spec(f)
155 cls._check_field_spec_keys(pconf)
156 fields_dict[pconf['name']] = pconf
157 all_fields_conf = list(fields_dict.values())
159 # check whether this class overrides __fields__
160 if len(bases):
161 # find highest base class that is an AbstractContainer (parent is higher than children)
162 base_cls = None
163 for base_cls in reversed(bases): 163 ↛ 167line 163 didn't jump to line 167, because the loop on line 163 didn't complete
164 if issubclass(base_cls, AbstractContainer):
165 break
167 base_fields = base_cls._get_fields() # tuple of field names from base class
168 if base_fields is not fields:
169 # check whether new fields spec already exists in base class
170 fields_to_remove_from_base = list()
171 for field_name in fields_dict:
172 if field_name in base_fields:
173 fields_to_remove_from_base.append(field_name)
174 # prepend field specs from base class to fields list of this class
175 # but only field specs that are not redefined in this class
176 base_fields_conf = base_cls.get_fields_conf() # tuple of fields configurations from base class
177 base_fields_conf_to_add = list()
178 for pconf in base_fields_conf:
179 if pconf['name'] not in fields_to_remove_from_base:
180 base_fields_conf_to_add.append(pconf)
181 all_fields_conf[0:0] = base_fields_conf_to_add
183 # create getter and setter if attribute does not already exist
184 # if 'doc' not specified in __fields__, use doc from docval of __init__
185 docs = {dv['name']: dv['doc'] for dv in get_docval(cls.__init__)}
186 for field_conf in all_fields_conf:
187 pname = field_conf['name']
188 field_conf.setdefault('doc', docs.get(pname))
189 if not hasattr(cls, pname):
190 setattr(cls, pname, property(cls._getter(field_conf), cls._setter(field_conf)))
192 cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf))
193 cls.__fieldsconf = tuple(all_fields_conf)
195 def __del__(self):
196 # Make sure the reference counter for our read IO is being decremented
197 try:
198 del self.__read_io
199 self.__read_io = None
200 except AttributeError:
201 pass
203 def __new__(cls, *args, **kwargs):
204 """
205 Static method of the object class called by Python to create the object first and then
206 __init__() is called to initialize the object's attributes.
208 NOTE: this method is called directly from ObjectMapper.__new_container__ during the process of
209 constructing the object from builders that are read from a file.
210 """
211 inst = super().__new__(cls)
212 if cls._experimental:
213 warn(_exp_warn_msg(cls))
214 inst.__container_source = kwargs.pop('container_source', None)
215 inst.__parent = None
216 inst.__children = list()
217 inst.__modified = True
218 inst.__object_id = kwargs.pop('object_id', str(uuid4()))
219 # this variable is being passed in from ObjectMapper.__new_container__ and is
220 # reset to False in that method after the object has been initialized by __init__
221 inst._in_construct_mode = kwargs.pop('in_construct_mode', False)
222 inst.parent = kwargs.pop('parent', None)
223 return inst
225 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'})
226 def __init__(self, **kwargs):
227 name = getargs('name', kwargs)
228 if '/' in name:
229 raise ValueError("name '" + name + "' cannot contain '/'")
230 self.__name = name
231 self.__field_values = dict()
232 self.__read_io = None
233 self.__obj = None
235 @property
236 def read_io(self):
237 """
238 The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container.
240 This property will typically be None if this Container is not a root Container
241 (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the
242 :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container.
243 """
244 return self.__read_io
246 @read_io.setter
247 def read_io(self, value):
248 """
249 Set the io object used to read this container
251 :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use
252 :raises ValueError: If io has already been set. We can't change the IO for a container.
253 :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO`
254 """
255 # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need
256 # it for type checking we import it here.
257 from hdmf.backends.io import HDMFIO
258 if not isinstance(value, HDMFIO):
259 raise TypeError("io must be an instance of HDMFIO")
260 if self.__read_io is not None and self.__read_io is not value:
261 raise ValueError("io has already been set for this container (name=%s, type=%s)" %
262 (self.name, str(type(self))))
263 else:
264 self.__read_io = value
266 def get_read_io(self):
267 """
268 Get the io object used to read this container.
270 If `self.read_io` is None, this function will iterate through the parents and return the
271 first `io` object found on a parent container
273 :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container.
274 Returns None in case no io object is found, e.g., in case this container has
275 not been read from file.
276 """
277 curr_obj = self
278 re_io = self.read_io
279 while re_io is None and curr_obj.parent is not None:
280 curr_obj = curr_obj.parent
281 re_io = curr_obj.read_io
282 return re_io
284 @property
285 def name(self):
286 '''
287 The name of this Container
288 '''
289 return self.__name
291 @docval({'name': 'data_type', 'type': str, 'doc': 'the data_type to search for', 'default': None})
292 def get_ancestor(self, **kwargs):
293 """
294 Traverse parent hierarchy and return first instance of the specified data_type
295 """
296 data_type = getargs('data_type', kwargs)
297 if data_type is None:
298 return self.parent
299 p = self.parent
300 while p is not None:
301 if getattr(p, p._data_type_attr) == data_type:
302 return p
303 p = p.parent
304 return None
306 def all_children(self):
307 """Get a list of all child objects and their child objects recursively.
309 If the object has an object_id, the object will be added to "ret" to be returned.
310 If that object has children, they will be added to the "stack" in order to be:
311 1) Checked to see if has an object_id, if so then add to "ret"
312 2) Have children that will also be checked
313 """
314 stack = [self] # list of containers, including self, to add and later parse for children
315 ret = list()
316 self.__obj = LabelledDict(label='all_objects', key_attr='object_id')
317 while len(stack): # search until there's nothing in the list
318 n = stack.pop()
319 ret.append(n)
320 if n.object_id is not None:
321 self.__obj[n.object_id] = n
322 else: # pragma: no cover
323 # warn that a child does not have an object_id, which is unusual
324 warn('%s "%s" does not have an object_id' % (type(n).__class__, n.name))
325 if hasattr(n, 'children'): 325 ↛ 317line 325 didn't jump to line 317, because the condition on line 325 was never false
326 for c in n.children:
327 stack.append(c)
328 return ret
330 @property
331 def all_objects(self):
332 """Get a LabelledDict that indexed all child objects and their children by object ID."""
333 if self.__obj is None: 333 ↛ 335line 333 didn't jump to line 335, because the condition on line 333 was never false
334 self.all_children()
335 return self.__obj
337 @docval()
338 def get_ancestors(self, **kwargs):
339 p = self.parent
340 ret = []
341 while p is not None:
342 ret.append(p)
343 p = p.parent
344 return tuple(ret)
346 @property
347 def fields(self):
348 '''
349 Subclasses use this class attribute to add properties to autogenerate.
350 `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}.
351 1. name: The name of the field property
352 2. child: A boolean value to set the parent/child relationship between the field property and the container.
353 3. required_name: The name the field property must have such that `name` matches `required_name`.
354 4. doc: Documentation of the field property
355 5. settable: If true, a setter function is created so that the field can be changed after creation.
356 '''
357 return self.__field_values
359 @property
360 def object_id(self):
361 if self.__object_id is None:
362 self.__object_id = str(uuid4())
363 return self.__object_id
365 @docval({'name': 'recurse', 'type': bool,
366 'doc': "whether or not to change the object ID of this container's children", 'default': True})
367 def generate_new_id(self, **kwargs):
368 """Changes the object ID of this Container and all of its children to a new UUID string."""
369 recurse = getargs('recurse', kwargs)
370 self.__object_id = str(uuid4())
371 self.set_modified()
372 if recurse:
373 for c in self.children:
374 c.generate_new_id(**kwargs)
376 @property
377 def modified(self):
378 return self.__modified
380 @docval({'name': 'modified', 'type': bool,
381 'doc': 'whether or not this Container has been modified', 'default': True})
382 def set_modified(self, **kwargs):
383 modified = getargs('modified', kwargs)
384 self.__modified = modified
385 if modified and isinstance(self.parent, Container):
386 self.parent.set_modified()
388 @property
389 def children(self):
390 return tuple(self.__children)
392 @docval({'name': 'child', 'type': 'Container',
393 'doc': 'the child Container for this Container', 'default': None})
394 def add_child(self, **kwargs):
395 warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.'))
396 child = getargs('child', kwargs)
397 if child is not None: 397 ↛ 405line 397 didn't jump to line 405, because the condition on line 397 was never false
398 # if child.parent is a Container, then the mismatch between child.parent and parent
399 # is used to make a soft/external link from the parent to a child elsewhere
400 # if child.parent is not a Container, it is either None or a Proxy and should be set to self
401 if not isinstance(child.parent, AbstractContainer): 401 ↛ exitline 401 didn't return from function 'add_child', because the condition on line 401 was never false
402 # actually add the child to the parent in parent setter
403 child.parent = self
404 else:
405 warn('Cannot add None as child to a container %s' % self.name)
407 @classmethod
408 def type_hierarchy(cls):
409 return cls.__mro__
411 @property
412 def container_source(self):
413 '''
414 The source of this Container
415 '''
416 return self.__container_source
418 @container_source.setter
419 def container_source(self, source):
420 if self.__container_source is not None:
421 raise Exception('cannot reassign container_source')
422 self.__container_source = source
424 @property
425 def parent(self):
426 '''
427 The parent Container of this Container
428 '''
429 # do it this way because __parent may not exist yet (not set in constructor)
430 return getattr(self, '_AbstractContainer__parent', None)
432 @parent.setter
433 def parent(self, parent_container):
434 if self.parent is parent_container:
435 return
437 if self.parent is not None:
438 if isinstance(self.parent, AbstractContainer):
439 raise ValueError(('Cannot reassign parent to Container: %s. '
440 'Parent is already: %s.' % (repr(self), repr(self.parent))))
441 else:
442 if parent_container is None:
443 raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self))
444 # NOTE this assumes isinstance(parent_container, Proxy) but we get a circular import
445 # if we try to do that
446 if self.parent.matches(parent_container):
447 self.__parent = parent_container
448 parent_container.__children.append(self)
449 parent_container.set_modified()
450 else:
451 self.__parent.add_candidate(parent_container)
452 else:
453 self.__parent = parent_container
454 if isinstance(parent_container, Container):
455 parent_container.__children.append(self)
456 parent_container.set_modified()
457 for child in self.children:
458 # used by hdmf.common.table.DynamicTableRegion to check for orphaned tables
459 child._validate_on_set_parent()
461 def _remove_child(self, child):
462 """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers."""
463 if not isinstance(child, AbstractContainer):
464 raise ValueError('Cannot remove non-AbstractContainer object from children.')
465 if child not in self.children:
466 raise ValueError("%s '%s' is not a child of %s '%s'." % (child.__class__.__name__, child.name,
467 self.__class__.__name__, self.name))
468 child.__parent = None
469 self.__children.remove(child)
470 child.set_modified()
471 self.set_modified()
473 def reset_parent(self):
474 """Reset the parent of this Container to None and remove the Container from the children of its parent.
476 Use with caution. This can result in orphaned containers and broken links.
477 """
478 if self.parent is None:
479 return
480 elif isinstance(self.parent, AbstractContainer):
481 self.parent._remove_child(self)
482 else:
483 raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent))
485 def _validate_on_set_parent(self):
486 """Validate this Container after setting the parent.
488 This method is called by the parent setter. It can be overridden in subclasses to perform additional
489 validation. The default implementation does nothing.
490 """
491 pass
494class Container(AbstractContainer):
495 """A container that can contain other containers and has special functionality for printing."""
497 _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'}
499 @classmethod
500 def _setter(cls, field):
501 """Returns a list of setter functions for the given field to be added to the class during class declaration."""
502 super_setter = AbstractContainer._setter(field)
503 ret = [super_setter]
504 # create setter with check for required name
505 # the AbstractContainer that is passed to the setter must have name = required_name
506 if field.get('required_name', None) is not None:
507 required_name = field['required_name']
508 idx1 = len(ret) - 1
510 def container_setter(self, val):
511 if val is not None:
512 if not isinstance(val, AbstractContainer):
513 msg = ("Field '%s' on %s has a required name and must be a subclass of AbstractContainer."
514 % (field['name'], self.__class__.__name__))
515 raise ValueError(msg)
516 if val.name != required_name:
517 msg = ("Field '%s' on %s must be named '%s'."
518 % (field['name'], self.__class__.__name__, required_name))
519 raise ValueError(msg)
520 ret[idx1](self, val) # call the previous setter
522 ret.append(container_setter)
524 # create setter that accepts a value or tuple, list, or dict or values and sets the value's parent to self
525 if field.get('child', False):
526 idx2 = len(ret) - 1
528 def container_setter(self, val):
529 ret[idx2](self, val) # call the previous setter
530 if val is not None:
531 if isinstance(val, (tuple, list)):
532 pass
533 elif isinstance(val, dict):
534 val = val.values()
535 else:
536 val = [val]
537 for v in val:
538 if not isinstance(v.parent, Container):
539 v.parent = self
540 else:
541 # the ObjectMapper will create a link from self (parent) to v (child with existing parent)
542 # still need to mark self as modified
543 self.set_modified()
545 ret.append(container_setter)
546 return ret[-1] # return the last setter (which should call the previous setters, if applicable)
548 def __repr__(self):
549 cls = self.__class__
550 template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self))
551 if len(self.fields):
552 template += "\nFields:\n"
553 for k in sorted(self.fields): # sorted to enable tests
554 v = self.fields[k]
555 # if isinstance(v, DataIO) or not hasattr(v, '__len__') or len(v) > 0:
556 if hasattr(v, '__len__'): 556 ↛ 563line 556 didn't jump to line 563, because the condition on line 556 was never false
557 if isinstance(v, (np.ndarray, list, tuple)):
558 if len(v) > 0:
559 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
560 elif v:
561 template += " {}: {}\n".format(k, self.__smart_str(v, 1))
562 else:
563 template += " {}: {}\n".format(k, v)
564 return template
566 def _repr_html_(self):
567 CSS_STYLE = """
568 <style>
569 .container-fields {
570 font-family: "Open Sans", Arial, sans-serif;
571 }
572 .container-fields .field-value {
573 color: #00788E;
574 }
575 .container-fields details > summary {
576 cursor: pointer;
577 display: list-item;
578 }
579 .container-fields details > summary:hover {
580 color: #0A6EAA;
581 }
582 </style>
583 """
585 JS_SCRIPT = """
586 <script>
587 function copyToClipboard(text) {
588 navigator.clipboard.writeText(text).then(function() {
589 console.log('Copied to clipboard: ' + text);
590 }, function(err) {
591 console.error('Could not copy text: ', err);
592 });
593 }
595 document.addEventListener('DOMContentLoaded', function() {
596 let fieldKeys = document.querySelectorAll('.container-fields .field-key');
597 fieldKeys.forEach(function(fieldKey) {
598 fieldKey.addEventListener('click', function() {
599 let accessCode = fieldKey.getAttribute('title').replace('Access code: ', '');
600 copyToClipboard(accessCode);
601 });
602 });
603 });
604 </script>
605 """
606 if self.name == self.__class__.__name__:
607 header_text = self.name
608 else:
609 header_text = f"{self.name} ({self.__class__.__name__})"
610 html_repr = CSS_STYLE
611 html_repr += JS_SCRIPT
612 html_repr += "<div class='container-wrap'>"
613 html_repr += (
614 f"<div class='container-header'><div class='xr-obj-type'><h3>{header_text}</h3></div></div>"
615 )
616 html_repr += self._generate_html_repr(self.fields)
617 html_repr += "</div>"
618 return html_repr
620 def _generate_html_repr(self, fields, level=0, access_code=".fields"):
621 html_repr = ""
623 if isinstance(fields, dict):
624 for key, value in fields.items():
625 current_access_code = f"{access_code}['{key}']"
626 if (
627 isinstance(value, (list, dict, np.ndarray))
628 or hasattr(value, "fields")
629 ):
630 label = key
631 if isinstance(value, dict):
632 label += f" ({len(value)})"
634 html_repr += (
635 f'<details><summary style="display: list-item; margin-left: {level * 20}px;" '
636 f'class="container-fields field-key" title="{current_access_code}"><b>{label}</b></summary>'
637 )
638 if hasattr(value, "fields"):
639 value = value.fields
640 current_access_code = current_access_code + ".fields"
641 html_repr += self._generate_html_repr(
642 value, level + 1, current_access_code
643 )
644 html_repr += "</details>"
645 else:
646 html_repr += (
647 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"'
648 f' title="{current_access_code}">{key}:</span> <span class="field-value">{value}</span></div>'
649 )
650 elif isinstance(fields, list): 650 ↛ 657line 650 didn't jump to line 657, because the condition on line 650 was never false
651 for index, item in enumerate(fields):
652 current_access_code = f"{access_code}[{index}]"
653 html_repr += (
654 f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-value"'
655 f' title="{current_access_code}">{str(item)}</span></div>'
656 )
657 elif isinstance(fields, np.ndarray):
658 str_ = str(fields).replace("\n", "</br>")
659 html_repr += (
660 f'<div style="margin-left: {level * 20}px;" class="container-fields">{str_}</div>'
661 )
662 else:
663 pass
665 return html_repr
667 @staticmethod
668 def __smart_str(v, num_indent):
669 """
670 Print compact string representation of data.
672 If v is a list, try to print it using numpy. This will condense the string
673 representation of datasets with many elements. If that doesn't work, just print the list.
675 If v is a dictionary, print the name and type of each element
677 If v is a set, print it sorted
679 If v is a neurodata_type, print the name of type
681 Otherwise, use the built-in str()
682 Parameters
683 ----------
684 v
686 Returns
687 -------
688 str
690 """
692 if isinstance(v, list) or isinstance(v, tuple):
693 if len(v) and isinstance(v[0], AbstractContainer):
694 return Container.__smart_str_list(v, num_indent, '(')
695 try:
696 return str(np.asarray(v))
697 except ValueError:
698 return Container.__smart_str_list(v, num_indent, '(')
699 elif isinstance(v, dict):
700 return Container.__smart_str_dict(v, num_indent)
701 elif isinstance(v, set): 701 ↛ 702line 701 didn't jump to line 702, because the condition on line 701 was never true
702 return Container.__smart_str_list(sorted(list(v)), num_indent, '{')
703 elif isinstance(v, AbstractContainer):
704 return "{} {}".format(getattr(v, 'name'), type(v))
705 else:
706 return str(v)
708 @staticmethod
709 def __smart_str_list(str_list, num_indent, left_br):
710 if left_br == '(': 710 ↛ 712line 710 didn't jump to line 712, because the condition on line 710 was never false
711 right_br = ')'
712 if left_br == '{': 712 ↛ 713line 712 didn't jump to line 713, because the condition on line 712 was never true
713 right_br = '}'
714 if len(str_list) == 0: 714 ↛ 715line 714 didn't jump to line 715, because the condition on line 714 was never true
715 return left_br + ' ' + right_br
716 indent = num_indent * 2 * ' '
717 indent_in = (num_indent + 1) * 2 * ' '
718 out = left_br
719 for v in str_list[:-1]:
720 out += '\n' + indent_in + Container.__smart_str(v, num_indent + 1) + ','
721 if str_list: 721 ↛ 723line 721 didn't jump to line 723, because the condition on line 721 was never false
722 out += '\n' + indent_in + Container.__smart_str(str_list[-1], num_indent + 1)
723 out += '\n' + indent + right_br
724 return out
726 @staticmethod
727 def __smart_str_dict(d, num_indent):
728 left_br = '{'
729 right_br = '}'
730 if len(d) == 0: 730 ↛ 731line 730 didn't jump to line 731, because the condition on line 730 was never true
731 return left_br + ' ' + right_br
732 indent = num_indent * 2 * ' '
733 indent_in = (num_indent + 1) * 2 * ' '
734 out = left_br
735 keys = sorted(list(d.keys()))
736 for k in keys[:-1]:
737 out += '\n' + indent_in + Container.__smart_str(k, num_indent + 1) + ' ' + str(type(d[k])) + ','
738 if keys: 738 ↛ 740line 738 didn't jump to line 740, because the condition on line 738 was never false
739 out += '\n' + indent_in + Container.__smart_str(keys[-1], num_indent + 1) + ' ' + str(type(d[keys[-1]]))
740 out += '\n' + indent + right_br
741 return out
743 def set_data_io(self, dataset_name, data_io_class, **kwargs):
744 data = self.fields.get(dataset_name)
745 if data is None:
746 raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class")
747 self.fields[dataset_name] = data_io_class(data=data, **kwargs)
750class Data(AbstractContainer):
751 """
752 A class for representing dataset containers
753 """
755 @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'},
756 {'name': 'data', 'type': ('scalar_data', 'array_data', 'data'), 'doc': 'the source of the data'},
757 {'name': 'term_set', 'type': TermSet, 'doc': 'the set of terms used to validate data on add',
758 'default': None})
759 def __init__(self, **kwargs):
760 data = popargs('data', kwargs)
761 self.term_set = popargs('term_set', kwargs)
762 super().__init__(**kwargs)
763 if self.term_set is not None:
764 bad_data = [term for term in data if not self.term_set.validate(term=term)]
765 for term in data:
766 if self.term_set.validate(term=term):
767 continue
768 else:
769 bad_data.append(term)
770 if len(bad_data)!=0:
771 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
772 raise ValueError(msg)
773 self.__data = data
774 else:
775 self.__data = data
777 @property
778 def data(self):
779 return self.__data
781 @property
782 def shape(self):
783 """
784 Get the shape of the data represented by this container
785 :return: Shape tuple
786 :rtype: tuple of ints
787 """
788 return get_data_shape(self.__data)
790 @docval({'name': 'dataio', 'type': DataIO, 'doc': 'the DataIO to apply to the data held by this Data'})
791 def set_dataio(self, **kwargs):
792 """
793 Apply DataIO object to the data held by this Data object
794 """
795 dataio = getargs('dataio', kwargs)
796 dataio.data = self.__data
797 self.__data = dataio
799 @docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
800 def transform(self, **kwargs):
801 """
802 Transform data from the current underlying state.
804 This function can be used to permanently load data from disk, or convert to a different
805 representation, such as a torch.Tensor
806 """
807 func = getargs('func', kwargs)
808 self.__data = func(self.__data)
809 return self
811 def __bool__(self):
812 if self.data is not None: 812 ↛ 817line 812 didn't jump to line 817, because the condition on line 812 was never false
813 if isinstance(self.data, (np.ndarray, tuple, list)):
814 return len(self.data) != 0
815 if self.data: 815 ↛ 817line 815 didn't jump to line 817, because the condition on line 815 was never false
816 return True
817 return False
819 def __len__(self):
820 return len(self.__data)
822 def __getitem__(self, args):
823 return self.get(args)
825 def get(self, args):
826 if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)):
827 return [self.data[i] for i in args]
828 if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray):
829 # This is needed for h5py 2.9 compatibility
830 args = args.tolist()
831 return self.data[args]
833 def append(self, arg):
834 if self.term_set is None:
835 self.__data = append_data(self.__data, arg)
836 else:
837 if self.term_set.validate(term=arg):
838 self.__data = append_data(self.__data, arg)
839 else:
840 msg = ('"%s" is not in the term set.' % arg)
841 raise ValueError(msg)
843 def extend(self, arg):
844 """
845 The extend_data method adds all the elements of the iterable arg to the
846 end of the data of this Data container.
848 :param arg: The iterable to add to the end of this VectorData
849 """
850 if self.term_set is None:
851 self.__data = extend_data(self.__data, arg)
852 else:
853 bad_data = []
854 for item in arg:
855 try:
856 self.append(item)
857 except ValueError:
858 bad_data.append(item)
859 if len(bad_data)!=0:
860 msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data]))
861 raise ValueError(msg)
864class DataRegion(Data):
866 @property
867 @abstractmethod
868 def data(self):
869 '''
870 The target data that this region applies to
871 '''
872 pass
874 @property
875 @abstractmethod
876 def region(self):
877 '''
878 The region that indexes into data e.g. slice or list of indices
879 '''
880 pass
883class MultiContainerInterface(Container):
884 """Class that dynamically defines methods to support a Container holding multiple Containers of the same type.
886 To use, extend this class and create a dictionary as a class attribute with any of the following keys:
887 * 'attr' to name the attribute that stores the Container instances
888 * 'type' to provide the Container object type (type or list/tuple of types, type can be a docval macro)
889 * 'add' to name the method for adding Container instances
890 * 'get' to name the method for getting Container instances
891 * 'create' to name the method for creating Container instances (only if a single type is specified)
893 If the attribute does not exist in the class, it will be generated. If it does exist, it should behave like a dict.
895 The keys 'attr', 'type', and 'add' are required.
896 """
898 def __new__(cls, *args, **kwargs):
899 if cls is MultiContainerInterface:
900 raise TypeError("Can't instantiate class MultiContainerInterface.")
901 if not hasattr(cls, '__clsconf__'):
902 raise TypeError("MultiContainerInterface subclass %s is missing __clsconf__ attribute. Please check that "
903 "the class is properly defined." % cls.__name__)
904 return super().__new__(cls, *args, **kwargs)
906 @staticmethod
907 def __add_article(noun):
908 if isinstance(noun, tuple):
909 noun = noun[0]
910 if isinstance(noun, type): 910 ↛ 912line 910 didn't jump to line 912, because the condition on line 910 was never false
911 noun = noun.__name__
912 if noun[0] in ('aeiouAEIOU'):
913 return 'an %s' % noun
914 return 'a %s' % noun
916 @staticmethod
917 def __join(argtype):
918 """Return a grammatical string representation of a list or tuple of classes or text.
920 Examples:
921 cls.__join(Container) returns "Container"
922 cls.__join((Container, )) returns "Container"
923 cls.__join((Container, Data)) returns "Container or Data"
924 cls.__join((Container, Data, Subcontainer)) returns "Container, Data, or Subcontainer"
925 """
927 def tostr(x):
928 return x.__name__ if isinstance(x, type) else x
930 if isinstance(argtype, (list, tuple)):
931 args_str = [tostr(x) for x in argtype]
932 if len(args_str) == 1:
933 return args_str[0]
934 if len(args_str) == 2:
935 return " or ".join(tostr(x) for x in args_str)
936 else:
937 return ", ".join(tostr(x) for x in args_str[:-1]) + ', or ' + args_str[-1]
938 else:
939 return tostr(argtype)
941 @classmethod
942 def __make_get(cls, func_name, attr_name, container_type):
943 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
945 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
946 'default': None},
947 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
948 func_name=func_name, doc=doc)
949 def _func(self, **kwargs):
950 name = getargs('name', kwargs)
951 d = getattr(self, attr_name)
952 ret = None
953 if name is None:
954 if len(d) > 1:
955 msg = ("More than one element in %s of %s '%s' -- must specify a name."
956 % (attr_name, cls.__name__, self.name))
957 raise ValueError(msg)
958 elif len(d) == 0:
959 msg = "%s of %s '%s' is empty." % (attr_name, cls.__name__, self.name)
960 raise ValueError(msg)
961 else: # only one item in dict
962 for v in d.values():
963 ret = v
964 else:
965 ret = d.get(name)
966 if ret is None:
967 msg = "'%s' not found in %s of %s '%s'." % (name, attr_name, cls.__name__, self.name)
968 raise KeyError(msg)
969 return ret
971 return _func
973 @classmethod
974 def __make_getitem(cls, attr_name, container_type):
975 doc = "Get %s from this %s" % (cls.__add_article(container_type), cls.__name__)
977 @docval({'name': 'name', 'type': str, 'doc': 'the name of the %s' % cls.__join(container_type),
978 'default': None},
979 rtype=container_type, returns='the %s with the given name' % cls.__join(container_type),
980 func_name='__getitem__', doc=doc)
981 def _func(self, **kwargs):
982 # NOTE this is the same code as the getter but with different error messages
983 name = getargs('name', kwargs)
984 d = getattr(self, attr_name)
985 ret = None
986 if name is None:
987 if len(d) > 1:
988 msg = ("More than one %s in %s '%s' -- must specify a name."
989 % (cls.__join(container_type), cls.__name__, self.name))
990 raise ValueError(msg)
991 elif len(d) == 0:
992 msg = "%s '%s' is empty." % (cls.__name__, self.name)
993 raise ValueError(msg)
994 else: # only one item in dict
995 for v in d.values():
996 ret = v
997 else:
998 ret = d.get(name)
999 if ret is None:
1000 msg = "'%s' not found in %s '%s'." % (name, cls.__name__, self.name)
1001 raise KeyError(msg)
1002 return ret
1004 return _func
1006 @classmethod
1007 def __make_add(cls, func_name, attr_name, container_type):
1008 doc = "Add one or multiple %s objects to this %s" % (cls.__join(container_type), cls.__name__)
1010 @docval({'name': attr_name, 'type': (list, tuple, dict, container_type),
1011 'doc': 'one or multiple %s objects to add to this %s' % (cls.__join(container_type), cls.__name__)},
1012 func_name=func_name, doc=doc)
1013 def _func(self, **kwargs):
1014 container = getargs(attr_name, kwargs)
1015 if isinstance(container, container_type):
1016 containers = [container]
1017 elif isinstance(container, dict):
1018 containers = container.values()
1019 else:
1020 containers = container
1021 d = getattr(self, attr_name)
1022 for tmp in containers:
1023 if not isinstance(tmp.parent, Container):
1024 tmp.parent = self
1025 else:
1026 # the ObjectMapper will create a link from self (parent) to tmp (child with existing parent)
1027 # still need to mark self as modified
1028 self.set_modified()
1029 if tmp.name in d:
1030 msg = "'%s' already exists in %s '%s'" % (tmp.name, cls.__name__, self.name)
1031 raise ValueError(msg)
1032 d[tmp.name] = tmp
1033 return container
1035 return _func
1037 @classmethod
1038 def __make_create(cls, func_name, add_name, container_type):
1039 doc = "Create %s object and add it to this %s" % (cls.__add_article(container_type), cls.__name__)
1041 @docval(*get_docval(container_type.__init__), func_name=func_name, doc=doc,
1042 returns="the %s object that was created" % cls.__join(container_type), rtype=container_type)
1043 def _func(self, **kwargs):
1044 ret = container_type(**kwargs)
1045 getattr(self, add_name)(ret)
1046 return ret
1048 return _func
1050 @classmethod
1051 def __make_constructor(cls, clsconf):
1052 args = list()
1053 for conf in clsconf:
1054 attr_name = conf['attr']
1055 container_type = conf['type']
1056 args.append({'name': attr_name, 'type': (list, tuple, dict, container_type),
1057 'doc': '%s to store in this interface' % cls.__join(container_type), 'default': dict()})
1059 args.append({'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': cls.__name__})
1061 @docval(*args, func_name='__init__')
1062 def _func(self, **kwargs):
1063 super().__init__(name=kwargs['name'])
1064 for conf in clsconf:
1065 attr_name = conf['attr']
1066 add_name = conf['add']
1067 container = popargs(attr_name, kwargs)
1068 add = getattr(self, add_name)
1069 add(container)
1071 return _func
1073 @classmethod
1074 def __make_getter(cls, attr):
1075 """Make a getter function for creating a :py:func:`property`"""
1077 def _func(self):
1078 # initialize the field to an empty labeled dict if it has not yet been
1079 # do this here to avoid creating default __init__ which may or may not be overridden in
1080 # custom classes and dynamically generated classes
1081 if attr not in self.fields:
1082 def _remove_child(child):
1083 if child.parent is self:
1084 self._remove_child(child)
1085 self.fields[attr] = LabelledDict(attr, remove_callable=_remove_child)
1087 return self.fields.get(attr)
1089 return _func
1091 @classmethod
1092 def __make_setter(cls, add_name):
1093 """Make a setter function for creating a :py:func:`property`"""
1095 @docval({'name': 'val', 'type': (list, tuple, dict), 'doc': 'the sub items to add', 'default': None})
1096 def _func(self, **kwargs):
1097 val = getargs('val', kwargs)
1098 if val is None:
1099 return
1100 getattr(self, add_name)(val)
1102 return _func
1104 @ExtenderMeta.pre_init
1105 def __build_class(cls, name, bases, classdict):
1106 """Verify __clsconf__ and create methods based on __clsconf__.
1107 This method is called prior to __new__ and __init__ during class declaration in the metaclass.
1108 """
1109 if not hasattr(cls, '__clsconf__'):
1110 return
1112 multi = False
1113 if isinstance(cls.__clsconf__, dict):
1114 clsconf = [cls.__clsconf__]
1115 elif isinstance(cls.__clsconf__, list):
1116 multi = True
1117 clsconf = cls.__clsconf__
1118 else:
1119 raise TypeError("'__clsconf__' for MultiContainerInterface subclass %s must be a dict or a list of "
1120 "dicts." % cls.__name__)
1122 for conf_index, conf_dict in enumerate(clsconf):
1123 cls.__build_conf_methods(conf_dict, conf_index, multi)
1125 # make __getitem__ (square bracket access) only if one conf type is defined
1126 if len(clsconf) == 1:
1127 attr = clsconf[0].get('attr')
1128 container_type = clsconf[0].get('type')
1129 setattr(cls, '__getitem__', cls.__make_getitem(attr, container_type))
1131 # create the constructor, only if it has not been overridden
1132 # i.e. it is the same method as the parent class constructor
1133 if '__init__' not in cls.__dict__:
1134 setattr(cls, '__init__', cls.__make_constructor(clsconf))
1136 @classmethod
1137 def __build_conf_methods(cls, conf_dict, conf_index, multi):
1138 # get add method name
1139 add = conf_dict.get('add')
1140 if add is None:
1141 msg = "MultiContainerInterface subclass %s is missing 'add' key in __clsconf__" % cls.__name__
1142 if multi:
1143 msg += " at index %d" % conf_index
1144 raise ValueError(msg)
1146 # get container attribute name
1147 attr = conf_dict.get('attr')
1148 if attr is None:
1149 msg = "MultiContainerInterface subclass %s is missing 'attr' key in __clsconf__" % cls.__name__
1150 if multi:
1151 msg += " at index %d" % conf_index
1152 raise ValueError(msg)
1154 # get container type
1155 container_type = conf_dict.get('type')
1156 if container_type is None:
1157 msg = "MultiContainerInterface subclass %s is missing 'type' key in __clsconf__" % cls.__name__
1158 if multi:
1159 msg += " at index %d" % conf_index
1160 raise ValueError(msg)
1162 # create property with the name given in 'attr' only if the attribute is not already defined
1163 if not hasattr(cls, attr):
1164 getter = cls.__make_getter(attr)
1165 setter = cls.__make_setter(add)
1166 doc = "a dictionary containing the %s in this %s" % (cls.__join(container_type), cls.__name__)
1167 setattr(cls, attr, property(getter, setter, None, doc))
1169 # create the add method
1170 setattr(cls, add, cls.__make_add(add, attr, container_type))
1172 # create the create method, only if a single container type is specified
1173 create = conf_dict.get('create')
1174 if create is not None:
1175 if isinstance(container_type, type):
1176 setattr(cls, create, cls.__make_create(create, add, container_type))
1177 else:
1178 msg = ("Cannot specify 'create' key in __clsconf__ for MultiContainerInterface subclass %s "
1179 "when 'type' key is not a single type") % cls.__name__
1180 if multi:
1181 msg += " at index %d" % conf_index
1182 raise ValueError(msg)
1184 # create the get method
1185 get = conf_dict.get('get')
1186 if get is not None:
1187 setattr(cls, get, cls.__make_get(get, attr, container_type))
1190class Row(object, metaclass=ExtenderMeta):
1191 """
1192 A class for representing rows from a Table.
1194 The Table class can be indicated with the __table__. Doing so
1195 will set constructor arguments for the Row class and ensure that
1196 Row.idx is set appropriately when a Row is added to the Table. It will
1197 also add functionality to the Table class for getting Row objects.
1199 Note, the Row class is not needed for working with Table objects. This
1200 is merely convenience functionality for working with Tables.
1201 """
1203 __table__ = None
1205 @property
1206 def idx(self):
1207 """The index of this row in its respective Table"""
1208 return self.__idx
1210 @idx.setter
1211 def idx(self, val):
1212 if self.__idx is None: 1212 ↛ 1215line 1212 didn't jump to line 1215, because the condition on line 1212 was never false
1213 self.__idx = val
1214 else:
1215 raise ValueError("cannot reset the ID of a row object")
1217 @property
1218 def table(self):
1219 """The Table this Row comes from"""
1220 return self.__table
1222 @table.setter
1223 def table(self, val):
1224 if val is not None: 1224 ↛ 1226line 1224 didn't jump to line 1226, because the condition on line 1224 was never false
1225 self.__table = val
1226 if self.idx is None:
1227 self.idx = self.__table.add_row(**self.todict())
1229 @ExtenderMeta.pre_init
1230 def __build_row_class(cls, name, bases, classdict):
1231 table_cls = getattr(cls, '__table__', None)
1232 if table_cls is not None:
1233 columns = getattr(table_cls, '__columns__')
1234 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1234 ↛ 1265line 1234 didn't jump to line 1265, because the condition on line 1234 was never false
1235 columns = deepcopy(columns)
1236 func_args = list()
1237 for col in columns:
1238 func_args.append(col)
1239 func_args.append({'name': 'table', 'type': Table, 'default': None,
1240 'help': 'the table this row is from'})
1241 func_args.append({'name': 'idx', 'type': int, 'default': None,
1242 'help': 'the index for this row'})
1244 @docval(*func_args)
1245 def __init__(self, **kwargs):
1246 super(cls, self).__init__()
1247 table, idx = popargs('table', 'idx', kwargs)
1248 self.__keys = list()
1249 self.__idx = None
1250 self.__table = None
1251 for k, v in kwargs.items():
1252 self.__keys.append(k)
1253 setattr(self, k, v)
1254 self.idx = idx
1255 self.table = table
1257 setattr(cls, '__init__', __init__)
1259 def todict(self):
1260 return {k: getattr(self, k) for k in self.__keys}
1262 setattr(cls, 'todict', todict)
1264 # set this so Table.row gets set when a Table is instantiated
1265 table_cls.__rowclass__ = cls
1266 else:
1267 if bases != (object,):
1268 raise ValueError('__table__ must be set if sub-classing Row')
1270 def __eq__(self, other):
1271 return self.idx == other.idx and self.table is other.table
1273 def __str__(self):
1274 return "Row(%i, %s) = %s" % (self.idx, self.table.name, str(self.todict()))
1277class RowGetter:
1278 """
1279 A simple class for providing __getitem__ functionality that returns
1280 Row objects to a Table.
1281 """
1283 def __init__(self, table):
1284 self.table = table
1285 self.cache = dict()
1287 def __getitem__(self, idx):
1288 ret = self.cache.get(idx)
1289 if ret is None:
1290 row = self.table[idx]
1291 ret = self.table.__rowclass__(*row, table=self.table, idx=idx)
1292 self.cache[idx] = ret
1293 return ret
1296class Table(Data):
1297 r'''
1298 Subclasses should specify the class attribute \_\_columns\_\_.
1300 This should be a list of dictionaries with the following keys:
1302 - ``name`` the column name
1303 - ``type`` the type of data in this column
1304 - ``doc`` a brief description of what gets stored in this column
1306 For reference, this list of dictionaries will be used with docval to autogenerate
1307 the ``add_row`` method for adding data to this table.
1309 If \_\_columns\_\_ is not specified, no custom ``add_row`` method will be added.
1311 The class attribute __defaultname__ can also be set to specify a default name
1312 for the table class. If \_\_defaultname\_\_ is not specified, then ``name`` will
1313 need to be specified when the class is instantiated.
1315 A Table class can be paired with a Row class for conveniently working with rows of
1316 a Table. This pairing must be indicated in the Row class implementation. See Row
1317 for more details.
1318 '''
1320 # This class attribute is used to indicate which Row class should be used when
1321 # adding RowGetter functionality to the Table.
1322 __rowclass__ = None
1324 @ExtenderMeta.pre_init
1325 def __build_table_class(cls, name, bases, classdict):
1326 if hasattr(cls, '__columns__'):
1327 columns = getattr(cls, '__columns__')
1329 idx = dict()
1330 for i, col in enumerate(columns):
1331 idx[col['name']] = i
1332 setattr(cls, '__colidx__', idx)
1334 if cls.__init__ == bases[-1].__init__: # check if __init__ is overridden 1334 ↛ 1350line 1334 didn't jump to line 1350, because the condition on line 1334 was never false
1335 name = {'name': 'name', 'type': str, 'doc': 'the name of this table'}
1336 defname = getattr(cls, '__defaultname__', None)
1337 if defname is not None: 1337 ↛ 1340line 1337 didn't jump to line 1340, because the condition on line 1337 was never false
1338 name['default'] = defname # override the name with the default name if present
1340 @docval(name,
1341 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the data in this table',
1342 'default': list()})
1343 def __init__(self, **kwargs):
1344 name, data = getargs('name', 'data', kwargs)
1345 colnames = [i['name'] for i in columns]
1346 super(cls, self).__init__(colnames, name, data)
1348 setattr(cls, '__init__', __init__)
1350 if cls.add_row == bases[-1].add_row: # check if add_row is overridden 1350 ↛ exitline 1350 didn't return from function '__build_table_class', because the condition on line 1350 was never false
1352 @docval(*columns)
1353 def add_row(self, **kwargs):
1354 return super(cls, self).add_row(kwargs)
1356 setattr(cls, 'add_row', add_row)
1358 @docval({'name': 'columns', 'type': (list, tuple), 'doc': 'a list of the columns in this table'},
1359 {'name': 'name', 'type': str, 'doc': 'the name of this container'},
1360 {'name': 'data', 'type': ('array_data', 'data'), 'doc': 'the source of the data', 'default': list()})
1361 def __init__(self, **kwargs):
1362 self.__columns = tuple(popargs('columns', kwargs))
1363 self.__col_index = {name: idx for idx, name in enumerate(self.__columns)}
1364 if getattr(self, '__rowclass__') is not None:
1365 self.row = RowGetter(self)
1366 super().__init__(**kwargs)
1368 @property
1369 def columns(self):
1370 return self.__columns
1372 @docval({'name': 'values', 'type': dict, 'doc': 'the values for each column'})
1373 def add_row(self, **kwargs):
1374 values = getargs('values', kwargs)
1375 if not isinstance(self.data, list): 1375 ↛ 1376line 1375 didn't jump to line 1376, because the condition on line 1375 was never true
1376 msg = 'Cannot append row to %s' % type(self.data)
1377 raise ValueError(msg)
1378 ret = len(self.data)
1379 row = [values[col] for col in self.columns]
1380 row = [v.idx if isinstance(v, Row) else v for v in row]
1381 self.data.append(tuple(row))
1382 return ret
1384 def which(self, **kwargs):
1385 '''
1386 Query a table
1387 '''
1388 if len(kwargs) != 1: 1388 ↛ 1389line 1388 didn't jump to line 1389, because the condition on line 1388 was never true
1389 raise ValueError("only one column can be queried")
1390 colname, value = kwargs.popitem()
1391 idx = self.__colidx__.get(colname)
1392 if idx is None: 1392 ↛ 1393line 1392 didn't jump to line 1393, because the condition on line 1392 was never true
1393 msg = "no '%s' column in %s" % (colname, self.__class__.__name__)
1394 raise KeyError(msg)
1395 ret = list()
1396 for i in range(len(self.data)):
1397 row = self.data[i]
1398 row_val = row[idx]
1399 if row_val == value:
1400 ret.append(i)
1401 return ret
1403 def __len__(self):
1404 return len(self.data)
1406 def __getitem__(self, args):
1407 idx = args
1408 col = None
1409 if isinstance(args, tuple):
1410 idx = args[1]
1411 if isinstance(args[0], str): 1411 ↛ 1413line 1411 didn't jump to line 1413, because the condition on line 1411 was never false
1412 col = self.__col_index.get(args[0])
1413 elif isinstance(args[0], int):
1414 col = args[0]
1415 else:
1416 raise KeyError('first argument must be a column name or index')
1417 return self.data[idx][col]
1418 elif isinstance(args, str):
1419 col = self.__col_index.get(args)
1420 if col is None: 1420 ↛ 1421line 1420 didn't jump to line 1421, because the condition on line 1420 was never true
1421 raise KeyError(args)
1422 return [row[col] for row in self.data]
1423 else:
1424 return self.data[idx]
1426 def to_dataframe(self):
1427 '''Produce a pandas DataFrame containing this table's data.
1428 '''
1430 data = {colname: self[colname] for ii, colname in enumerate(self.columns)}
1431 return pd.DataFrame(data)
1433 @classmethod
1434 @docval(
1435 {'name': 'df', 'type': pd.DataFrame, 'doc': 'input data'},
1436 {'name': 'name', 'type': str, 'doc': 'the name of this container', 'default': None},
1437 {
1438 'name': 'extra_ok',
1439 'type': bool,
1440 'doc': 'accept (and ignore) unexpected columns on the input dataframe',
1441 'default': False
1442 },
1443 )
1444 def from_dataframe(cls, **kwargs):
1445 '''Construct an instance of Table (or a subclass) from a pandas DataFrame. The columns of the dataframe
1446 should match the columns defined on the Table subclass.
1447 '''
1449 df, name, extra_ok = getargs('df', 'name', 'extra_ok', kwargs)
1451 cls_cols = list([col['name'] for col in getattr(cls, '__columns__')])
1452 df_cols = list(df.columns)
1454 missing_columns = set(cls_cols) - set(df_cols)
1455 extra_columns = set(df_cols) - set(cls_cols)
1457 if extra_columns:
1458 raise ValueError(
1459 'unrecognized column(s) {} for table class {} (columns {})'.format(
1460 extra_columns, cls.__name__, cls_cols
1461 )
1462 )
1464 use_index = False
1465 if len(missing_columns) == 1 and list(missing_columns)[0] == df.index.name: 1465 ↛ 1466line 1465 didn't jump to line 1466, because the condition on line 1465 was never true
1466 use_index = True
1468 elif missing_columns: 1468 ↛ 1469line 1468 didn't jump to line 1469, because the condition on line 1468 was never true
1469 raise ValueError(
1470 'missing column(s) {} for table class {} (columns {}, provided {})'.format(
1471 missing_columns, cls.__name__, cls_cols, df_cols
1472 )
1473 )
1475 data = []
1476 for index, row in df.iterrows():
1477 if use_index: 1477 ↛ 1478line 1477 didn't jump to line 1478, because the condition on line 1477 was never true
1478 data.append([
1479 row[colname] if colname != df.index.name else index
1480 for colname in cls_cols
1481 ])
1482 else:
1483 data.append(tuple([row[colname] for colname in cls_cols]))
1485 if name is None:
1486 return cls(data=data)
1487 return cls(name=name, data=data)