Coverage for src/hdmf/common/alignedtable.py: 99%
171 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
1"""
2Collection of Container classes for interacting with aligned and hierarchical dynamic tables
3"""
4from collections import OrderedDict
6import numpy as np
7import pandas as pd
9from . import register_class
10from .table import DynamicTable
11from ..utils import docval, getargs, popargs, get_docval, AllowPositional
14@register_class('AlignedDynamicTable')
15class AlignedDynamicTable(DynamicTable):
16 """
17 DynamicTable container that supports storing a collection of subtables. Each sub-table is a
18 DynamicTable itself that is aligned with the main table by row index. I.e., all
19 DynamicTables stored in this group MUST have the same number of rows. This type effectively
20 defines a 2-level table in which the main data is stored in the main table implemented by this type
21 and additional columns of the table are grouped into categories, with each category being'
22 represented by a separate DynamicTable stored within the group.
24 NOTE: To remain compatible with DynamicTable, the attribute colnames represents only the
25 columns of the main table (not including the category tables). To get the full list of
26 column names, use the get_colnames() function instead.
27 """
28 __fields__ = ({'name': 'category_tables', 'child': True}, )
30 @docval(*get_docval(DynamicTable.__init__),
31 {'name': 'category_tables', 'type': list,
32 'doc': 'List of DynamicTables to be added to the container. NOTE: Only regular '
33 'DynamicTables are allowed. Using AlignedDynamicTable as a category for '
34 'AlignedDynamicTable is currently not supported.', 'default': None},
35 {'name': 'categories', 'type': 'array_data',
36 'doc': 'List of names with the ordering of category tables', 'default': None},
37 allow_positional=AllowPositional.WARNING)
38 def __init__(self, **kwargs): # noqa: C901
39 in_category_tables = popargs('category_tables', kwargs)
40 in_categories = popargs('categories', kwargs)
41 if in_category_tables is not None:
42 # Error check to make sure that all category_table are regular DynamicTable
43 for i, v in enumerate(in_category_tables):
44 if not isinstance(v, DynamicTable):
45 raise ValueError("Category table with index %i is not a DynamicTable" % i)
46 if isinstance(v, AlignedDynamicTable):
47 raise ValueError("Category table with index %i is an AlignedDynamicTable. "
48 "Nesting of AlignedDynamicTable is currently not supported." % i)
49 # set in_categories from the in_category_tables if it is empty
50 if in_categories is None and in_category_tables is not None:
51 in_categories = [tab.name for tab in in_category_tables]
52 # check that if categories is given that we also have category_tables
53 if in_categories is not None and in_category_tables is None:
54 raise ValueError("Categories provided but no category_tables given")
55 # at this point both in_categories and in_category_tables should either both be None or both be a list
56 if in_categories is not None:
57 if len(in_categories) != len(in_category_tables):
58 raise ValueError("%s category_tables given but %s categories specified" %
59 (len(in_category_tables), len(in_categories)))
60 # Initialize the main dynamic table
61 super().__init__(**kwargs)
62 # Create and set all sub-categories
63 dts = OrderedDict()
64 # Add the custom categories given as inputs
65 if in_category_tables is not None:
66 # We may need to resize our main table when adding categories as the user may not have set ids
67 if len(in_category_tables) > 0: 67 ↛ 76line 67 didn't jump to line 76, because the condition on line 67 was never false
68 # We have categories to process
69 if len(self.id) == 0:
70 # The user did not initialize our main table id's nor set columns for our main table
71 for i in range(len(in_category_tables[0])):
72 self.id.append(i)
73 # Add the user-provided categories in the correct order as described by the categories
74 # This is necessary, because we do not store the categories explicitly but we maintain them
75 # as the order of our self.category_tables. In this makes sure look-ups are consistent.
76 lookup_index = OrderedDict([(k, -1) for k in in_categories])
77 for i, v in enumerate(in_category_tables):
78 # Error check that the name of the table is in our categories list
79 if v.name not in lookup_index:
80 raise ValueError("DynamicTable %s does not appear in categories %s" % (v.name, str(in_categories)))
81 # Error check to make sure no two tables with the same name are given
82 if lookup_index[v.name] >= 0:
83 raise ValueError("Duplicate table name %s found in input dynamic_tables" % v.name)
84 lookup_index[v.name] = i
85 for table_name, tabel_index in lookup_index.items():
86 # This error case should not be able to occur since the length of the in_categories and
87 # in_category_tables must match and we made sure that each DynamicTable we added had its
88 # name in the in_categories list. We, therefore, exclude this check from coverage testing
89 # but we leave it in just as a backup trigger in case something unexpected happens
90 if tabel_index < 0: # pragma: no cover
91 raise ValueError("DynamicTable %s listed in categories but does not appear in category_tables" %
92 table_name) # pragma: no cover
93 # Test that all category tables have the correct number of rows
94 category = in_category_tables[tabel_index]
95 if len(category) != len(self):
96 raise ValueError('Category DynamicTable %s does not align, it has %i rows expected %i' %
97 (category.name, len(category), len(self)))
98 # Add the category table to our category_tables.
99 dts[category.name] = category
100 # Set the self.category_tables attribute, which will set the parent/child relationships for the category_tables
101 self.category_tables = dts
103 def __contains__(self, val):
104 """
105 Check if the given value (i.e., column) exists in this table
107 :param val: If val is a string then check if the given category exists. If val is a tuple
108 of two strings (category, colname) then check for the given category if the given colname exists.
109 """
110 if isinstance(val, str):
111 return val in self.category_tables or val in self.colnames
112 elif isinstance(val, tuple):
113 if len(val) != 2:
114 raise ValueError("Expected tuple of strings of length 2 got tuple of length %i" % len(val))
115 return val[1] in self.get_category(val[0])
116 else:
117 return False
119 @property
120 def categories(self):
121 """
122 Get the list of names the categories
124 Short-hand for list(self.category_tables.keys())
126 :raises: KeyError if the given name is not in self.category_tables
127 """
128 return list(self.category_tables.keys())
130 @docval({'name': 'category', 'type': DynamicTable, 'doc': 'Add a new DynamicTable category'},)
131 def add_category(self, **kwargs):
132 """
133 Add a new DynamicTable to the AlignedDynamicTable to create a new category in the table.
135 NOTE: The table must align with (i.e, have the same number of rows as) the main data table (and
136 other category tables). I.e., if the AlignedDynamicTable is already populated with data
137 then we have to populate the new category with the corresponding data before adding it.
139 :raises: ValueError is raised if the input table does not have the same number of rows as the main table.
140 ValueError is raised if the table is an AlignedDynamicTable instead of regular DynamicTable.
141 """
142 category = getargs('category', kwargs)
143 if len(category) != len(self):
144 raise ValueError('New category DynamicTable does not align, it has %i rows expected %i' %
145 (len(category), len(self)))
146 if category.name in self.category_tables:
147 raise ValueError("Category %s already in the table" % category.name)
148 if isinstance(category, AlignedDynamicTable):
149 raise ValueError("Category is an AlignedDynamicTable. Nesting of AlignedDynamicTable "
150 "is currently not supported.")
151 self.category_tables[category.name] = category
152 category.parent = self
154 @docval({'name': 'name', 'type': str, 'doc': 'Name of the category we want to retrieve', 'default': None})
155 def get_category(self, **kwargs):
156 name = popargs('name', kwargs)
157 if name is None or (name not in self.category_tables and name == self.name):
158 return self
159 else:
160 return self.category_tables[name]
162 @docval(*get_docval(DynamicTable.add_column),
163 {'name': 'category', 'type': str, 'doc': 'The category the column should be added to',
164 'default': None})
165 def add_column(self, **kwargs):
166 """
167 Add a column to the table
169 :raises: KeyError if the category does not exist
171 """
172 category_name = popargs('category', kwargs)
173 if category_name is None:
174 # Add the column to our main table
175 super().add_column(**kwargs)
176 else:
177 # Add the column to a sub-category table
178 try:
179 category = self.get_category(category_name)
180 except KeyError:
181 raise KeyError("Category %s not in table" % category_name)
182 category.add_column(**kwargs)
184 @docval({'name': 'data', 'type': dict, 'doc': 'the data to put in this row', 'default': None},
185 {'name': 'id', 'type': int, 'doc': 'the ID for the row', 'default': None},
186 {'name': 'enforce_unique_id', 'type': bool, 'doc': 'enforce that the id in the table must be unique',
187 'default': False},
188 allow_extra=True)
189 def add_row(self, **kwargs):
190 """
191 We can either provide the row data as a single dict or by specifying a dict for each category
192 """
193 data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs)
194 data = data if data is not None else kwargs
196 # extract the category data
197 category_data = {k: data.pop(k) for k in self.categories if k in data}
199 # Check that we have the approbriate categories provided
200 missing_categories = set(self.categories) - set(list(category_data.keys()))
201 if missing_categories:
202 raise KeyError(
203 '\n'.join([
204 'row data keys do not match available categories',
205 'missing {} category keys: {}'.format(len(missing_categories), missing_categories)
206 ])
207 )
208 # Add the data to our main dynamic table
209 data['id'] = row_id
210 data['enforce_unique_id'] = enforce_unique_id
211 super().add_row(**data)
213 # Add the data to all out dynamic table categories
214 for category, values in category_data.items():
215 self.category_tables[category].add_row(**values)
217 @docval({'name': 'include_category_tables', 'type': bool,
218 'doc': "Ignore sub-category tables and just look at the main table", 'default': False},
219 {'name': 'ignore_category_ids', 'type': bool,
220 'doc': "Ignore id columns of sub-category tables", 'default': False})
221 def get_colnames(self, **kwargs):
222 """Get the full list of names of columns for this table
224 :returns: List of tuples (str, str) where the first string is the name of the DynamicTable
225 that contains the column and the second string is the name of the column. If
226 include_category_tables is False, then a list of column names is returned.
227 """
228 if not getargs('include_category_tables', kwargs):
229 return self.colnames
230 else:
231 ignore_category_ids = getargs('ignore_category_ids', kwargs)
232 columns = [(self.name, c) for c in self.colnames]
233 for category in self.category_tables.values():
234 if not ignore_category_ids:
235 columns += [(category.name, 'id'), ]
236 columns += [(category.name, c) for c in category.colnames]
237 return columns
239 @docval({'name': 'ignore_category_ids', 'type': bool,
240 'doc': "Ignore id columns of sub-category tables", 'default': False})
241 def to_dataframe(self, **kwargs):
242 """Convert the collection of tables to a single pandas DataFrame"""
243 dfs = [super().to_dataframe().reset_index(), ]
244 if getargs('ignore_category_ids', kwargs):
245 dfs += [category.to_dataframe() for category in self.category_tables.values()]
246 else:
247 dfs += [category.to_dataframe().reset_index() for category in self.category_tables.values()]
248 names = [self.name, ] + list(self.category_tables.keys())
249 res = pd.concat(dfs, axis=1, keys=names)
250 res.set_index((self.name, 'id'), drop=True, inplace=True)
251 return res
253 def __getitem__(self, item):
254 """
255 Called to implement standard array slicing syntax.
257 Same as ``self.get(item)``. See :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.get` for details.
258 """
259 return self.get(item)
261 def get(self, item, **kwargs):
262 """
263 Access elements (rows, columns, category tables etc.) from the table. Instead of calling
264 this function directly, the class also implements standard array slicing syntax
265 via :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.__getitem__`
266 (which calls this function). For example, instead of calling
267 ``self.get(item=slice(2,5))`` we may use the often more convenient form of ``self[2:5]`` instead.
269 :param item: Selection defining the items of interest. This may be either a:
271 * **int, list, array, slice** : Return one or multiple row of the table as a pandas.DataFrame. For example:
272 * ``self[0]`` : Select the first row of the table
273 * ``self[[0,3]]`` : Select the first and fourth row of the table
274 * ``self[1:4]`` : Select the rows with index 1,2,3 from the table
276 * **string** : Return a column from the main table or a category table. For example:
277 * ``self['column']`` : Return the column from the main table.
278 * ``self['my_category']`` : Returns a DataFrame of the ``my_category`` category table.
279 This is a shorthand for ``self.get_category('my_category').to_dataframe()``.
281 * **tuple**: Get a column, row, or cell from a particular category table. The tuple is expected to
282 consist of the following elements:
284 * ``category``: string with the name of the category. To select from the main
285 table use ``self.name`` or ``None``.
286 * ``column``: string with the name of the column, and
287 * ``row``: integer index of the row.
289 The tuple itself then may take the following forms:
291 * Select a single column from a table via:
292 * ``self[category, column]``
293 * Select a single full row of a given category table via:
294 * ``self[row, category]`` (recommended, for consistency with DynamicTable)
295 * ``self[category, row]``
296 * Select a single cell via:
297 * ``self[row, (category, column)]`` (recommended, for consistency with DynamicTable)
298 * ``self[row, category, column]``
299 * ``self[category, column, row]``
301 :returns: Depending on the type of selection the function returns a:
303 * **pandas.DataFrame**: when retrieving a row or category table
304 * **array** : when retrieving a single column
305 * **single value** : when retrieving a single cell. The data type and shape will depend on the
306 data type and shape of the cell/column.
307 """
308 if isinstance(item, (int, list, np.ndarray, slice)):
309 # get a single full row from all tables
310 dfs = ([super().get(item, **kwargs).reset_index(), ] +
311 [category[item].reset_index() for category in self.category_tables.values()])
312 names = [self.name, ] + list(self.category_tables.keys())
313 res = pd.concat(dfs, axis=1, keys=names)
314 res.set_index((self.name, 'id'), drop=True, inplace=True)
315 return res
316 elif isinstance(item, str) or item is None:
317 if item in self.colnames:
318 # get a specific column
319 return super().get(item, **kwargs)
320 else:
321 # get a single category
322 return self.get_category(item).to_dataframe()
323 elif isinstance(item, tuple): 323 ↛ exitline 323 didn't return from function 'get', because the condition on line 323 was never false
324 if len(item) == 2:
325 # DynamicTable allows selection of cells via the syntax [int, str], i.e,. [row_index, columnname]
326 # We support this syntax here as well with the additional caveat that in AlignedDynamicTable
327 # columns are identified by tuples of strings. As such [int, str] refers not to a cell but
328 # a single row in a particular category table (i.e., [row_index, category]). To select a cell
329 # the second part of the item then is a tuple of strings, i.e., [row_index, (category, column)]
330 if isinstance(item[0], (int, np.integer)):
331 # Select a single cell or row of a sub-table based on row-index(item[0])
332 # and the category (if item[1] is a string) or column (if item[1] is a tuple of (category, column)
333 re = self[item[0]][item[1]]
334 # re is a pandas.Series or pandas.Dataframe. If we selected a single cell
335 # (i.e., item[2] was a tuple defining a particular column) then return the value of the cell
336 if re.size == 1:
337 re = re.values[0]
338 # If we selected a single cell from a ragged column then we need to change the list to a tuple
339 if isinstance(re, list):
340 re = tuple(re)
341 # We selected a row of a whole table (i.e., item[2] identified only the category table,
342 # but not a particular column).
343 # Change the result from a pandas.Series to a pandas.DataFrame for consistency with DynamicTable
344 if isinstance(re, pd.Series): 344 ↛ 345line 344 didn't jump to line 345, because the condition on line 344 was never true
345 re = re.to_frame()
346 return re
347 else:
348 return self.get_category(item[0])[item[1]]
349 elif len(item) == 3:
350 if isinstance(item[0], (int, np.integer)):
351 return self.get_category(item[1])[item[2]][item[0]]
352 else:
353 return self.get_category(item[0])[item[1]][item[2]]
354 else:
355 raise ValueError("Expected tuple of length 2 of the form [category, column], [row, category], "
356 "[row, (category, column)] or a tuple of length 3 of the form "
357 "[category, column, row], [row, category, column]")
359 @docval({'name': 'ignore_category_tables', 'type': bool,
360 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
361 allow_extra=False)
362 def has_foreign_columns(self, **kwargs):
363 """
364 Does the table contain DynamicTableRegion columns
366 :returns: True if the table or any of the category tables contains a DynamicTableRegion column, else False
367 """
368 ignore_category_tables = getargs('ignore_category_tables', kwargs)
369 if super().has_foreign_columns():
370 return True
371 if not ignore_category_tables:
372 for table in self.category_tables.values():
373 if table.has_foreign_columns():
374 return True
375 return False
377 @docval({'name': 'ignore_category_tables', 'type': bool,
378 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
379 allow_extra=False)
380 def get_foreign_columns(self, **kwargs):
381 """
382 Determine the names of all columns that link to another DynamicTable, i.e.,
383 find all DynamicTableRegion type columns. Similar to a foreign key in a
384 database, a DynamicTableRegion column references elements in another table.
386 :returns: List of tuples (str, str) where the first string is the name of the
387 category table (or None if the column is in the main table) and the
388 second string is the column name.
389 """
390 ignore_category_tables = getargs('ignore_category_tables', kwargs)
391 col_names = [(None, col_name) for col_name in super().get_foreign_columns()]
392 if not ignore_category_tables:
393 for table in self.category_tables.values():
394 col_names += [(table.name, col_name) for col_name in table.get_foreign_columns()]
395 return col_names
397 @docval(*get_docval(DynamicTable.get_linked_tables),
398 {'name': 'ignore_category_tables', 'type': bool,
399 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
400 allow_extra=False)
401 def get_linked_tables(self, **kwargs):
402 """
403 Get a list of the full list of all tables that are being linked to directly or indirectly
404 from this table via foreign DynamicTableColumns included in this table or in any table that
405 can be reached through DynamicTableRegion columns
408 Returns: List of dicts with the following keys:
409 * 'source_table' : The source table containing the DynamicTableRegion column
410 * 'source_column' : The relevant DynamicTableRegion column in the 'source_table'
411 * 'target_table' : The target DynamicTable; same as source_column.table.
413 """
414 ignore_category_tables = getargs('ignore_category_tables', kwargs)
415 other_tables = None if ignore_category_tables else list(self.category_tables.values())
416 return super().get_linked_tables(other_tables=other_tables)