Source code for wbia.dtool.depcache_table

# -*- coding: utf-8 -*-
"""
Module contining DependencyCacheTable

python -m dtool.depcache_control --exec-make_graph --show
python -m dtool.depcache_control --exec-make_graph --show --reduce

FIXME:
    RECTIFY: ismulti / ismodel need to be rectified. This indicate that this
        table recieves multiple inputs from at least one parent table.

    RECTIFY: Need to standardize parent rowids -vs- parent args.
        in one-to-one cases they are the same. In multi cases the rowids indicate
        a uuid and the args are the saved set of rowids that exist in the manifest.

    RECTIFY: is rowid_list row-major or column-major?
        I think currently rowid_list is row-major and rowid_listT is column-major
        but this may not be consistent.



"""
from __future__ import absolute_import, division, print_function, unicode_literals
import utool as ut
import ubelt as ub
import six
import itertools as it
from wbia.dtool.sql_control import SQLDatabaseController
from six.moves import zip, range
from os.path import join, exists
from wbia.dtool import __SQLITE__ as lite  # NOQA
import networkx as nx
import re

(print, rrr, profile) = ut.inject2(__name__, '[depcache_table]')


EXTERN_SUFFIX = '_extern_uri'

CONFIG_TABLE = 'config'
CONFIG_ROWID = 'config_rowid'
CONFIG_HASHID = 'config_hashid'
CONFIG_TABLENAME = 'config_tablename'  # tablename associated with config
CONFIG_STRID = 'config_strid'
CONFIG_DICT = 'config_dict'


# if ut.is_developer():
#     GRACE_PERIOD = 10
# else:
GRACE_PERIOD = ut.get_argval('--grace', type_=int, default=0)

STORE_CFGDICT = True


[docs]class ExternType(ub.NiceRepr): """ Type to denote an external resource not saved in an SQL table """ def __init__(self, read_func, write_func, extern_ext=None, extkey=None): self.write_func = write_func self.read_func = read_func self.extern_ext = extern_ext self.extkey = extkey def __nice__(self): ext = None ext = self.extkey if self.extkey else ext ext = self.extern_ext if self.extern_ext and ext else ext return '(%s, %s, %s)' % ( ut.get_funcname(self.read_func), ut.get_funcname(self.write_func), ext, )
[docs]class ExternalStorageException(Exception): """ Indicates a missing external file """ def __init__(self, *args, **kwargs): super(ExternalStorageException, self).__init__(*args, **kwargs)
[docs]def predrop_grace_period(tablename, seconds=None): """ Hack that gives the user some time to abort deleting everything """ global GRACE_PERIOD warnmsg_fmt = ut.codeblock( """ WARNING TABLE={tablename} IS MODIFIED About to reset (DROP) entire cache={tablename}. Generally this is OK and you shouldnt worry because depcache information should be recomputable. If you really dont want this to happen you have {seconds} seconds to kill this process before deletion occurs. """ ) if seconds is None: seconds = GRACE_PERIOD GRACE_PERIOD = max(0, GRACE_PERIOD // 2) warnmsg = warnmsg_fmt.format(tablename=tablename, seconds=seconds) # return ut.are_you_sure(warnmsg) return ut.grace_period(warnmsg, seconds)
[docs]def make_extern_io_funcs(table, cls): """ Hack in read/write defaults for pickleable classes """ def _read_func(fpath, verbose=ut.VERBOSE): state_dict = ut.load_data(fpath, verbose=verbose) # FIXME: The constructor should not be called by default to conform to # pickle standards self = cls() self.__setstate__(state_dict) if hasattr(self, 'on_load'): self.on_load(table.depc) return self def _write_func(fpath, self, verbose=ut.VERBOSE): if hasattr(self, 'on_save'): self.on_save(table.depc, fpath) ut.save_data(fpath, self.__getstate__(), verbose=verbose, n=4) return _read_func, _write_func
[docs]@profile def ensure_config_table(db): """ SQL definition of configuration table. """ config_addtable_kw = ut.odict( [ ('tablename', CONFIG_TABLE,), ( 'coldef_list', [ (CONFIG_ROWID, 'INTEGER PRIMARY KEY'), (CONFIG_HASHID, 'TEXT'), (CONFIG_TABLENAME, 'TEXT'), (CONFIG_STRID, 'TEXT'), ] + ([(CONFIG_DICT, 'DICT')] if STORE_CFGDICT else []), ), ('docstr', 'table for algo configurations'), ('superkeys', [(CONFIG_HASHID,)]), ('dependson', []), ] ) if not db.has_table(CONFIG_TABLE): db.add_table(**config_addtable_kw) else: current_state = db.get_table_autogen_dict(CONFIG_TABLE) new_state = config_addtable_kw if current_state['coldef_list'] != new_state['coldef_list']: if predrop_grace_period(CONFIG_TABLE): db.drop_all_tables() db.add_table(**new_state) else: raise NotImplementedError('Need to be able to modify tables')
@ut.reloadable_class class _TableConfigHelper(object): """ helper for configuration table """ def get_parent_rowids(table, rowid_list): """ Args: rowid_list (list): native table rowids Returns: parent_rowids (list of tuples): tuples of parent rowids Example: >>> # TODO: Need a test that creates a table >>> # with two multi-dependencies and a two single dependencies >>> # Then add two items to this table, and for each item >>> # Find their parent inputs """ parent_rowids = table.get_internal_columns( rowid_list, table.parent_id_colnames, unpack_scalars=True, keepwrap=True ) return parent_rowids def get_parent_rowargs(table, rowid_list): """ Args: rowid_list (list): native table rowids Returns: parent_rowids (list of tuples): tuples of parent rowids Example: >>> # TODO: Need a test that creates a table >>> # with two multi-dependencies and a two single dependencies >>> # Then add two items to this table, and for each item >>> # Find their parent inputs """ parent_rowids = table.get_parent_rowids(rowid_list) parent_ismulti = table.get_parent_col_attr('ismulti') if any(parent_ismulti): # If any of the parent columns are multi-indexes, then lookup the # mapping from the aggregated uuid to the expanded rowid set. parent_args = [] model_uuids = table.get_model_uuid(rowid_list) for rowid, uuid, p_id_list in zip(rowid_list, model_uuids, parent_rowids): input_info = table.get_model_inputs(uuid) fixed_args = [] for p_name, p_id, flag in zip( table.parent_id_colnames, p_id_list, parent_ismulti ): if flag: new_p_id = input_info[p_name + '_model_input'] col_uuid = input_info[p_name + '_multi_id'] assert ( col_uuid == p_id ), 'the model input has unexpectedly changed' fixed_args.append(new_p_id) else: fixed_args.append(p_id) parent_args.append(fixed_args) else: parent_args = parent_rowids return parent_args def get_row_parent_rowid_map(table, rowid_list): """ >>> from wbia.dtool.depcache_table import * # NOQA parent_rowid_dict = depc.['feat'].get_row_parent_rowid_map(rowid_list) key = parent_rowid_dict.keys()[0] val = parent_rowid_dict.values()[0] """ parent_rowids = table.get_parent_rowids(rowid_list) parent_rowid_dict = dict( zip(table.parent_id_tablenames, ut.list_transpose(parent_rowids)) ) return parent_rowid_dict def get_config_history(table, rowid_list, assume_unique=True): """ Returns the list of config objects for all properties in the dependency history of this object. Multi-edges are handled. Set assume_unique to be false if there might be parents with different configs for the same table. >>> from wbia.dtool.depcache_table import * # NOQA parent_rowid_dict = depc.['feat'].get_row_parent_rowid_map(rowid_list) key = parent_rowid_dict.keys()[0] val = parent_rowid_dict.values()[0] """ if assume_unique: rowid_list = rowid_list[0:1] tbl_cfgids = table.get_row_cfgid(rowid_list) cfgid2_rowids = ut.group_items(rowid_list, tbl_cfgids) unique_cfgids = cfgid2_rowids.keys() unique_cfgids = ut.filter_Nones(unique_cfgids) if len(unique_cfgids) == 0: return None unique_configs = table.get_config_from_rowid(unique_cfgids) # parent_rowids = table.get_parent_rowids(rowid_list) parent_rowargs = table.get_parent_rowargs(rowid_list) ret_list = [unique_configs] depc = table.depc rowargsT = ut.listT(parent_rowargs) parent_ismulti = table.get_parent_col_attr('ismulti') for tblname, ismulti, ids in zip( table.parent_id_tablenames, parent_ismulti, rowargsT ): if tblname == depc.root: continue if ismulti: ids = ids[0] parent_tbl = depc[tblname] ancestor_configs = parent_tbl.get_config_history(ids) if ancestor_configs is not None: ret_list.extend(ancestor_configs) return ret_list def __remove_old_configs(table): """ table = ibs.depc['pairwise_match'] """ # developing # c = table.db.get_table_as_pandas('config') # t = table.db.get_table_as_pandas(table.tablename) # config_rowids = table.db.get_all_rowids(CONFIG_TABLE) # cfgdict_list = table.db.get( # CONFIG_TABLE, colnames=(CONFIG_DICT,), id_iter=config_rowids, # id_colname=CONFIG_ROWID) # bad_rowids = [] # for rowid, cfgdict in zip(config_rowids, cfgdict_list): # if cfgdict['version'] < 7: # bad_rowids.append(rowid) command = ut.codeblock( """ SELECT rowid, {} from {} """ ).format(CONFIG_DICT, CONFIG_TABLE) table.db.cur.execute(command) bad_rowids = [] for rowid, cfgdict in table.db.cur.fetchall(): # MAKE GENERAL CONDITION if cfgdict['version'] < 7: bad_rowids.append(rowid) in_str = '(' + ', '.join(map(str, bad_rowids)) + ')' command = ut.codeblock( """ SELECT rowid from {tablename} WHERE config_rowid IN {bad_rowids} """ ).format(tablename=table.tablename, bad_rowids=in_str) # print(command) table.db.cur.execute(command) rowids = ut.flatten(table.db.cur.fetchall()) table.delete_rows(rowids, dry=True, verbose=True, delete_extern=True) def get_ancestor_rowids(table, rowid_list, target_table): parent_rowids = table.get_parent_rowids(rowid_list) depc = table.depc for tblname, ids in zip( table.parent_id_tablenames, ut.list_transpose(parent_rowids) ): if tblname == target_table: return ids parent_tbl = depc[tblname] ancestor_ids = parent_tbl.get_ancestor_rowids(ids, target_table) if ancestor_ids is not None: return ancestor_ids return None # Base case def get_row_cfgid(table, rowid_list): """ >>> from wbia.dtool.depcache_table import * # NOQA """ config_rowids = table.get_internal_columns(rowid_list, (CONFIG_ROWID,)) return config_rowids def get_row_configs(table, rowid_list): """ Example: >>> # ENABLE_DOCTEST >>> # xdoctest: +REQUIRES(module:wbia) >>> from wbia.algo.hots.query_request import * # NOQA >>> from wbia.dtool.example_depcache import * # NOQA >>> depc = testdata_depc() >>> table = depc['chip'] >>> rowid_list = depc.get_rowids('chip', [1, 2], config={}) >>> configs = table.get_row_configs(rowid_list) """ config_rowids = table.get_row_cfgid(rowid_list) # Only look up the configs that are needed unique_config_rowids, groupxs = ut.group_indices(config_rowids) unique_configs = table.get_config_from_rowid(unique_config_rowids) configs = ut.ungroup_unique(unique_configs, groupxs, maxval=len(rowid_list) - 1) return configs def get_row_cfghashid(table, rowid_list): config_rowids = table.get_row_cfgid(rowid_list) config_hashids = table.get_config_hashid(config_rowids) return config_hashids def get_row_cfgstr(table, rowid_list): config_rowids = table.get_row_cfgid(rowid_list) cfgstr_list = table.db.get( CONFIG_TABLE, colnames=(CONFIG_STRID,), id_iter=config_rowids, id_colname=CONFIG_ROWID, ) return cfgstr_list def get_config_rowid(table, config=None, _debug=None): if isinstance(config, int): config_rowid = config else: config_rowid = table.add_config(config, _debug) return config_rowid def get_config_hashid(table, config_rowid_list): hashid_list = table.db.get( CONFIG_TABLE, colnames=(CONFIG_HASHID,), id_iter=config_rowid_list, id_colname=CONFIG_ROWID, ) return hashid_list def get_config_rowid_from_hashid(table, config_hashid_list): config_rowid_list = table.db.get( CONFIG_TABLE, colnames=(CONFIG_ROWID,), id_iter=config_hashid_list, id_colname=CONFIG_HASHID, ) return config_rowid_list def get_config_from_rowid(table, config_rowids): assert STORE_CFGDICT cfgdict_list = table.db.get( CONFIG_TABLE, colnames=(CONFIG_DICT,), id_iter=config_rowids, id_colname=CONFIG_ROWID, ) return [ None if dict_ is None else table.configclass(**dict_) for dict_ in cfgdict_list ] # @profile def add_config(table, config, _debug=None): try: # assume config is AlgoRequest or TableConfig config_strid = config.get_cfgstr() except AttributeError: config_strid = ut.to_json(config) config_hashid = ut.hashstr27(config_strid) if table.depc._debug or _debug: print('config_strid = %r' % (config_strid,)) print('config_hashid = %r' % (config_hashid,)) get_rowid_from_superkey = table.get_config_rowid_from_hashid if STORE_CFGDICT: colnames = (CONFIG_HASHID, CONFIG_TABLENAME, CONFIG_STRID, CONFIG_DICT) if hasattr(config, 'config'): # Hack for requests config = config.config cfgdict = config.__getstate__() param_list = [(config_hashid, table.tablename, config_strid, cfgdict)] else: colnames = (CONFIG_HASHID, CONFIG_TABLENAME, CONFIG_STRID) param_list = [(config_hashid, table.tablename, config_strid)] config_rowid_list = table.db.add_cleanly( CONFIG_TABLE, colnames, param_list, get_rowid_from_superkey ) config_rowid = config_rowid_list[0] if table.depc._debug: print('config_rowid_list = %r' % (config_rowid_list,)) # print('config_rowid = %r' % (config_rowid,)) return config_rowid @ut.reloadable_class class _TableDebugHelper(object): """ Contains printing and debug things """ def print_sql_info(table): add_op = table.db._make_add_table_sqlstr(sep='\n ', **table._get_addtable_kw()) ut.cprint(add_op, 'sql') def print_internal_info(table, all_attrs=False): """ CommandLine: python -m dtool.depcache_table --exec-print_internal_info Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> tablenames = ['labeler', 'vsone', 'neighbs', 'indexer'] >>> for table in ut.take(depc, tablenames): # .tables: >>> table.print_internal_info() """ print('----') print(table) # Print the other inferred attrs print('table.parent_col_attrs = %s' % (ut.repr3(table.parent_col_attrs, nl=2),)) print('table.data_col_attrs = %s' % (ut.repr3(table.data_col_attrs, nl=2),)) # Print the inferred allcol attrs ut.cprint( 'table.internal_col_attrs = %s' % (ut.repr3(table.internal_col_attrs, nl=1, sorted_=False)), 'python', ) add_table_kw = table._get_addtable_kw() print('table.add_table_kw = %s' % (ut.repr2(add_table_kw, nl=2),)) table.print_sql_info() if all_attrs: # Print all attributes for a in ut.get_instance_attrnames( table, with_properties=True, default=False ): print(' table.%s = %r' % (a, getattr(table, a))) def print_table(table,): table.db.print_table_csv(table.tablename) # if table.ismulti: # table.print_model_manifests() def print_info(table, with_colattrs=True, with_graphattrs=True): """ debug function """ print('TABLE ATTRIBUTES') print('table.tablename = %r' % (table.tablename,)) print('table.isinteractive = %r' % (table.isinteractive,)) print('table.default_onthefly = %r' % (table.default_onthefly,)) print('table.rm_extern_on_delete = %r' % (table.rm_extern_on_delete,)) print('table.chunksize = %r' % (table.chunksize,)) print('table.fname = %r' % (table.fname,)) print('table.docstr = %r' % (table.docstr,)) print('table.data_colnames = %r' % (table.data_colnames,)) print('table.data_coltypes = %r' % (table.data_coltypes,)) if with_graphattrs: print('TABLE GRAPH ATTRIBUTES') print('table.children = %r' % (table.children,)) print('table.parent = %r' % (table.parent,)) print('table.configclass = %r' % (table.configclass,)) print('table.requestclass = %r' % (table.requestclass,)) if with_colattrs: nl = 1 print('TABEL COLUMN ATTRIBUTES') print('table.data_col_attrs = %s' % (ut.repr3(table.data_col_attrs, nl=nl),)) print( 'table.parent_col_attrs = %s' % (ut.repr3(table.parent_col_attrs, nl=nl),) ) print( 'table.internal_data_col_attrs = %s' % (ut.repr3(table.internal_data_col_attrs, nl=nl),) ) print( 'table.internal_parent_col_attrs = %s' % (ut.repr3(table.internal_parent_col_attrs, nl=nl),) ) print( 'table.internal_col_attrs = %s' % (ut.repr3(table.internal_col_attrs, nl=nl),) ) def print_schemadef(table): print('\n'.join(table.db.get_table_autogen_str(table.tablename))) def print_configs(table): """ CommandLine: python -m dtool.depcache_table --exec-print_configs Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> table = depc['keypoint'] >>> config = table.configclass() >>> rowids = table.get_rowids_from_root([1, 2], config=config) >>> config = table.configclass(adapt_shape=False) >>> rowids = table.get_rowids_from_root([1, 2], config=config) >>> table.print_configs() >>> table = depc['chip'] >>> rowids = depc.get_rowids('spam', [1, 2]) >>> table.print_configs() """ text = table.db.get_table_csv(CONFIG_TABLE) print(text) def print_csv(table, truncate=True): print(table.db.get_table_csv(table.tablename, truncate=truncate)) def print_model_manifests(table): print('manifests') rowids = table._get_all_rowids() uuids = table.get_model_uuid(rowids) for rowid, uuid in zip(rowids, uuids): print('rowid = %r' % (rowid,)) print(ut.repr3(table.get_model_inputs(uuid), nl=1)) def _assert_self(table): assert len(table.data_colnames) == len( table.data_coltypes ), 'specify same number of colnames and coltypes' if table.preproc_func is not None: # Check that preproc_func has a valid signature # ie (depc, parent_ids, config) argspec = ut.get_func_argspec(table.preproc_func) args = argspec.args if argspec.varargs and argspec.keywords: assert len(args) == 1, 'varargs and kwargs must have one arg for depcache' else: if len(args) < 3: print('args = %r' % (args,)) msg = ( 'preproc_func=%r for table=%s must have a ' 'depcache arg, at least one parent rowid arg, ' 'and a config arg' ) % (table.preproc_func, table.tablename,) raise AssertionError(msg) rowid_args = args[1:-1] if len(rowid_args) != len(table.parents()): print('table.preproc_func = %r' % (table.preproc_func,)) print('args = %r' % (args,)) print('rowid_args = %r' % (rowid_args,)) msg = ( 'preproc function for table=%s must have as many ' 'rowids %d args as parent %d' ) % (table.tablename, len(rowid_args), len(table.parents())) raise AssertionError(msg) extern_class_colattrs = [ colattr for colattr in table.data_col_attrs if colattr.get('is_external_class') ] for colattr in extern_class_colattrs: cls = colattr['coltype'] # Check external column class funcs argspec = ut.get_func_argspec(cls.__init__) if argspec.defaults is not None: num_nondefault = len(argspec.args) - len(argspec.defaults) else: num_nondefault = len(argspec.args) if num_nondefault > 1: msg = ut.codeblock( """ External args must be able to be constructed without any args. IE: You need a default __init__(self) method """ ) raise AssertionError(msg) @ut.reloadable_class class _TableInternalSetup(ub.NiceRepr): """ helper that sets up column information """ @profile def _infer_datacol(table): """ Constructs the columns needed to represent relationship to data Infers interal properties about this table given the colnames and datatypes CommandLine: python -m dtool.depcache_table --exec-_infer_datacol --show Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> for table in depc.tables: >>> print('----') >>> table._infer_datacol() >>> print(table) >>> print('table.data_col_attrs = %s' % >>> ut.repr3(table.data_col_attrs, nl=8)) >>> table = depc['probchip'] >>> table = depc['spam'] >>> table = depc['vsone'] """ data_col_attrs = [] # Parse column datatypes _iter = enumerate(zip(table.data_colnames, table.data_coltypes)) for data_colx, (colname, coltype) in _iter: colattr = ut.odict() # Check column input subtypes is_tuple = isinstance(coltype, tuple) is_func = ut.is_func_or_method(coltype) is_externtup = is_tuple and coltype[0] == 'extern' is_functup = is_tuple and ut.is_func_or_method(coltype[0]) is_exttype = isinstance(coltype, ExternType) # Check column input main types is_normal = coltype in lite.TYPE_TO_SQLTYPE # is_normal = not (is_tuple or is_func) isnested = is_tuple and not (is_func or is_externtup) is_external = is_func or is_functup or is_externtup or is_exttype # Switch on input types colattr['colname'] = colname colattr['coltype'] = coltype colattr['data_colx'] = data_colx if is_normal: # Normal non-nested column sqltype = lite.TYPE_TO_SQLTYPE[coltype] colattr['intern_colname'] = colname colattr['sqltype'] = sqltype colattr['is_normal'] = is_normal elif isnested: # Nested non-function normal columns colattr['isnested'] = isnested nestattrs = colattr['nestattrs'] = [] for count, subtype in enumerate(coltype): nestattr = ut.odict() nestattrs.append(nestattr) flat_colname = '%s_%d' % (colname, count) sqltype = lite.TYPE_TO_SQLTYPE[subtype] nestattr['flat_colname'] = flat_colname nestattr['sqltype'] = sqltype elif is_external: # Nested external funcs write_func = None if is_exttype: read_func = coltype.read_func write_func = coltype.write_func if coltype.extern_ext is not None: colattr['extern_ext'] = coltype.extern_ext if coltype.extkey is not None: colattr['extkey'] = coltype.extkey elif is_externtup: read_func = coltype[1] if len(coltype) > 2: write_func = coltype[2] if len(coltype) > 3: colattr['extern_ext'] = coltype[3] elif is_functup: read_func = coltype[0] else: read_func = coltype intern_colname = colname + EXTERN_SUFFIX sqltype = lite.TYPE_TO_SQLTYPE[str] colattr['is_external'] = True colattr['intern_colname'] = intern_colname colattr['write_func'] = write_func colattr['read_func'] = read_func colattr['sqltype'] = sqltype else: # External class column assert hasattr(coltype, '__getstate__') and hasattr( coltype, '__setstate__' ), ('External classes must have __getstate__ and ' '__setstate__ methods') read_func, write_func = make_extern_io_funcs(table, coltype) sqltype = lite.TYPE_TO_SQLTYPE[str] intern_colname = colname + EXTERN_SUFFIX # raise AssertionError('external class columns') colattr['is_external'] = True colattr['is_external_class'] = True colattr['coltype'] = coltype colattr['intern_colname'] = intern_colname colattr['write_func'] = write_func colattr['read_func'] = read_func colattr['sqltype'] = sqltype data_col_attrs.append(colattr) return data_col_attrs @profile def _infer_parentcol(table): """ construct columns to represent relationship to parent CommandLine: python -m dtool.depcache_table _infer_parentcol --show Returns: list: list of dictionaries for each parent Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> table = depc['vsone'] >>> table = depc['smk_match'] >>> table = depc['neighbs'] >>> table = depc['indexer'] >>> parent_col_attrs = table._infer_parentcol() >>> result = ('parent_col_attrs = %s' % (ut.repr2(parent_col_attrs, nl=2),)) >>> print(result) Ignore: >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> depc.d.get_indexer_data([1, 2, 3]) >>> import uuid >>> depc.d.get_indexer_data([ >>> uuid.UUID('a01eda32-e4e0-b139-3274-e91d1b3e9ecf')]) """ parent_tablenames = table.parent_tablenames parent_col_attrs = [] # Handle dependencies when a parent are pairwise between tables parent_id_prefixs1 = [] parent_id_prefixs2 = [] seen_ = ut.ddict(lambda: 1) for parent_colx, col in enumerate(parent_tablenames): colattr = ut.odict() # Detect multicolumns if col.endswith('*'): ismulti = True parent_table = col[:-1] else: ismulti = False parent_table = col colattr['col'] = col colattr['ismulti'] = ismulti # Local input-id helps specify branch ordering colattr['local_input_id'] = '' if ismulti: colattr['local_input_id'] += '*' colattr['parent_table'] = parent_table colattr['parent_colx'] = parent_colx parent_id_prefixs1.append(parent_table) parent_col_attrs.append(colattr) colhist = ut.dict_hist(parent_id_prefixs1) for parent_colx, col in enumerate(parent_id_prefixs1): colattr = parent_col_attrs[parent_colx] if colhist[col] > 1: # Duplicate column names recieve indicies nwise_idx = seen_[col] nwise_total = colhist[col] prefix = col + str(nwise_idx) seen_[col] += 1 colattr['isnwise'] = True colattr['nwise_total'] = nwise_total colattr['nwise_idx'] = nwise_idx colattr['local_input_id'] += six.text_type(nwise_idx) else: if not colattr['local_input_id']: colattr['local_input_id'] = '1' prefix = col colattr['isnwise'] = False colattr['prefix'] = prefix parent_id_prefixs2.append(prefix) # Handle case when parent are a set of ids for colattr, prefix in zip(parent_col_attrs, parent_id_prefixs2): column_ismulti = colattr['ismulti'] if column_ismulti: # Case when dependencies are many to one hash of set items colname = prefix + '_setuuid' sqltype = 'UUID NOT NULL' INPUT_SIZE_SUFFIX = 'setsize' extra_cols = [] extra_cols += [ { 'intern_colname': prefix + '_' + INPUT_SIZE_SUFFIX, 'sqltype': 'INTEGER NOT NULL', #'doc': 'size of an input set for this model', } ] # File that maintains manifest of model inputs # INPUT_FPATH_SUFFIX = 'setfpath' # extra_cols += [{ #'intern_colname': prefix + '_' + INPUT_FPATH_SUFFIX, #'sqltype': 'TEXT' # }] colattr['extra_cols'] = extra_cols # colattr['issuper'] = True else: # Normal case when dependencies are one to one colname = prefix + '_rowid' sqltype = 'INTEGER NOT NULL' colattr['intern_colname'] = colname colattr['sqltype'] = sqltype parent_col_attrs = [ ut.order_dict_by(colattr, ['intern_colname', 'sqltype']) for colattr in parent_col_attrs ] return parent_col_attrs @profile def _infer_allcol(table): r""" Combine information from parentcol and datacol Build column definitions that will directly define SQL columns """ internal_col_attrs = [] # Append primary column colattr = ut.odict( [ ('intern_colname', table.rowid_colname), ('sqltype', 'INTEGER PRIMARY KEY'), ('isprimary', True), ] ) colattr['intern_colx'] = len(internal_col_attrs) internal_col_attrs.append(colattr) # Append parent columns ismulti = False for parent_colattr in table.parent_col_attrs: colattr = ut.odict() colattr['intern_colname'] = parent_colattr['intern_colname'] colattr['parent_table'] = parent_colattr['parent_table'] if parent_colattr['ismulti']: ismulti = True colattr['ismulti'] = parent_colattr['ismulti'] colattr['isnwise'] = parent_colattr['isnwise'] if colattr['isnwise']: colattr['nwise_total'] = parent_colattr['nwise_total'] colattr['nwise_idx'] = parent_colattr['nwise_idx'] colattr['sqltype'] = parent_colattr['sqltype'] colattr['parent_colx'] = parent_colattr['parent_colx'] colattr['intern_colx'] = len(internal_col_attrs) colattr['isparent'] = True colattr['issuper'] = True internal_col_attrs.append(colattr) # Append config columns colattr = ut.odict( [ ('intern_colname', CONFIG_ROWID), ('sqltype', 'INTEGER DEFAULT 0'), ('issuper', True), ] ) colattr['intern_colx'] = len(internal_col_attrs) internal_col_attrs.append(colattr) # Append quick access column # return any(table.get_parent_col_attr('ismulti')) # if table.ismulti: if ismulti: # Append model uuid column colattr = ut.odict() colattr['intern_colname'] = table.model_uuid_colname colattr['sqltype'] = 'UUID NOT NULL' colattr['intern_colx'] = len(internal_col_attrs) internal_col_attrs.append(colattr) # Append model uuid column colattr = ut.odict() colattr['intern_colname'] = table.is_augmented_colname colattr['sqltype'] = 'INTEGER DEFAULT 0' colattr['intern_colx'] = len(internal_col_attrs) internal_col_attrs.append(colattr) if False: # TODO: eventually enable if table.taggable: colattr = ut.odict() colattr['intern_colname'] = 'model_tag' colattr['sqltype'] = 'TEXT' colattr['intern_colx'] = len(internal_col_attrs) internal_col_attrs.append(colattr) pass else: # Append primary rowid column pass # Append data columns for data_colattr in table.data_col_attrs: colname = data_colattr['colname'] if data_colattr.get('isnested', False): for nestcol in data_colattr['nestattrs']: colattr = ut.odict() colattr['intern_colname'] = nestcol['flat_colname'] colattr['sqltype'] = nestcol['sqltype'] colattr['intern_colx'] = len(internal_col_attrs) colattr['data_colx'] = data_colattr['data_colx'] colattr['colname'] = colname colattr['isdata'] = True internal_col_attrs.append(colattr) else: colattr = ut.odict() colattr['intern_colname'] = data_colattr['intern_colname'] colattr['sqltype'] = data_colattr['sqltype'] colattr['intern_colx'] = len(internal_col_attrs) colattr['data_colx'] = data_colattr['data_colx'] colattr['isdata'] = True colattr['colname'] = colname if data_colattr.get('is_external', False): colattr['is_external_pointer'] = True colattr['write_func'] = data_colattr['write_func'] colattr['read_func'] = data_colattr['read_func'] internal_col_attrs.append(colattr) # Append extra columns for parent_colattr in table.parent_col_attrs: for extra_colattr in parent_colattr.get('extra_cols', []): colattr = ut.odict() colattr['intern_colname'] = extra_colattr['intern_colname'] colattr['sqltype'] = extra_colattr['sqltype'] colattr['intern_colx'] = len(internal_col_attrs) colattr['isextra'] = True internal_col_attrs.append(colattr) return internal_col_attrs @ut.reloadable_class class _TableGeneralHelper(ub.NiceRepr): """ helper """ def __nice__(table): num_parents = len(table.parent_tablenames) num_cols = len(table.data_colnames) return '(%s) nP=%d%s nC=%d' % ( table.tablename, num_parents, '*' if False and table.ismulti else '', num_cols, ) # @property # def _table_colnames(table): # return @property def extern_dpath(table): cache_dpath = table.depc.cache_dpath extern_dname = 'extern_' + table.tablename extern_dpath = join(cache_dpath, extern_dname) return extern_dpath @property def dpath(table): # assert table.ismulti, 'only valid for models' dname = table.tablename + '_storage' dpath = join(table.depc.cache_dpath, dname) # ut.ensuredir(dpath) return dpath # def dpath(table): # from os.path import dirname # dpath = dirname(table.db.fpath) # return dpath @property @ut.memoize def ismulti(table): # TODO: or has multi parent return any(table.get_parent_col_attr('ismulti')) @property def configclass(table): return table.depc.configclass_dict[table.tablename] @property def requestclass(table): return table.depc.requestclass_dict.get(table.tablename, None) def new_request(table, qaids, daids, cfgdict=None): request = table.depc.new_request(table.tablename, qaids, daids, cfgdict=cfgdict) return request # --- Standard Properties @property def internal_data_col_attrs(table): flags = table.get_intern_col_attr('isdata') return ut.compress(table.internal_col_attrs, flags) @property def internal_parent_col_attrs(table): flags = table.get_intern_col_attr('isparent') return ut.compress(table.internal_col_attrs, flags) # --- / Standard Properties @ut.memoize def get_parent_col_attr(table, key): return ut.dict_take_column(table.parent_col_attrs, key) @ut.memoize def get_intern_data_col_attr(table, key): return ut.dict_take_column(table.internal_data_col_attrs, key) @ut.memoize def get_intern_parent_col_attr(table, key): return ut.dict_take_column(table.internal_parent_col_attrs, key) @ut.memoize def get_intern_col_attr(table, key): return ut.dict_take_column(table.internal_col_attrs, key) @ut.memoize def get_data_col_attr(table, key): return ut.dict_take_column(table.data_col_attrs, key) @property @ut.memoize def parent_id_tablenames(table): tablenames = tuple( [parent_colattr['parent_table'] for parent_colattr in table.parent_col_attrs] ) return tablenames @property @ut.memoize def parent_id_prefix(table): prefixes = tuple( [parent_colattr['prefix'] for parent_colattr in table.parent_col_attrs] ) return prefixes @property def extern_columns(table): colnames = table.get_data_col_attr('colname') flags = table.get_data_col_attr('is_extern') return ut.compress(colnames, flags) @property def rowid_colname(table): """ rowid of this table used by other dependant tables """ return table.tablename + '_rowid' @property def superkey_colnames(table): return table.parent_id_colnames + (CONFIG_ROWID,) @property def model_uuid_colname(table): return 'model_uuid' @property def is_augmented_colname(table): return 'augment_bit' @property def parent_id_colnames(table): return tuple([colattr['intern_colname'] for colattr in table.parent_col_attrs]) def get_rowids_from_root(table, root_rowids, config=None): return table.depc.get_rowids(table.tablename, root_rowids, config=config) @property @ut.memoize def parent(table): return ut.odict( [ (parent_colattr['parent_table'], parent_colattr) for parent_colattr in table.parent_col_attrs ] ) # return tuple([parent_colattr['parent_table'] # for parent_colattr in table.parent_col_attrs]) @ut.memoize def parents(table, data=None): if data: return [ (parent_colattr['parent_table'], parent_colattr) for parent_colattr in table.parent_col_attrs ] else: return [ parent_colattr['parent_table'] for parent_colattr in table.parent_col_attrs ] @property def children(table): graph = table.depc.explicit_graph children_tablenames = list(nx.neighbors(graph, table.tablename)) return children_tablenames @property def ancestors(table): graph = table.depc.explicit_graph children_tablenames = list(nx.ancestors(graph, table.tablename)) return children_tablenames def show_dep_subgraph(table, inter=None): from wbia.plottool.interactions import ExpandableInteraction autostart = inter is None if inter is None: inter = ExpandableInteraction(nCols=2) import wbia.plottool as pt graph = table.depc.explicit_graph nodes = ut.nx_all_nodes_between(graph, None, table.tablename) G = graph.subgraph(nodes) plot_kw = {'fontname': 'Ubuntu'} inter.append_plot( ut.partial( pt.show_nx, G, title='Dependency Subgraph (%s)' % (table.tablename), **plot_kw, ) ) if autostart: inter.start() def show_input_graph(table, inter=None): """ CommandLine: python -m dtool.depcache_table show_input_graph --show Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> # xdoctest: +REQUIRES(--show) >>> ut.quit_if_noshow() >>> import wbia.plottool as pt >>> table = depc['smk_match'] >>> table.show_input_graph() >>> #print(depc['smk_match'].flat_compute_rmi_edges) >>> ut.show_if_requested() """ from wbia.plottool.interactions import ExpandableInteraction autostart = inter is None if inter is None: inter = ExpandableInteraction(nCols=2) table.show_dep_subgraph(inter) inputs = table.rootmost_inputs inter = inputs.show_exi_graph(inter) if autostart: inter.start() return inter @property @ut.memoize def expanded_input_graph(table): """ CommandLine: python -m dtool.depcache_table --exec-expanded_input_graph --show --table=neighbs python -m dtool.depcache_table --exec-expanded_input_graph --show --table=vsone python -m dtool.depcache_table --exec-expanded_input_graph --show --table=smk_match TODO: * determine root argument structure * ??? * compute dependencies in order Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_control import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> tablename = ut.get_argval('--table', default='vsone') >>> table = depc[tablename] >>> # xdoctest: +REQUIRES(--show) >>> import wbia.plottool as pt >>> pt.ensureqt() >>> table.show_input_graph() >>> pt.interactions.zoom_factory() >>> ut.show_if_requested() """ from wbia.dtool import input_helpers graph = table.depc.explicit_graph.copy() target = table.tablename exi_graph = input_helpers.make_expanded_input_graph(graph, target) return exi_graph @property def rootmost_inputs(table): """ CommandLine: python -m dtool.depcache_table rootmost_inputs --show Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.depcache_control import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> #tablename = 'multitest_score' >>> tablename = 'smk_match' >>> table = depc[tablename] >>> inputs = table.rootmost_inputs >>> result = ('inputs = %s' % (inputs,)) >>> print('compute_order = %s' % (ut.repr2(inputs.flat_compute_rmi_edges(), nl=1))) ...... >>> print(result) inputs = <TableInput [annot[t], vocab[t], inv_index[t]]> """ from wbia.dtool import input_helpers exi_graph = table.expanded_input_graph rootmost_inputs = input_helpers.get_rootmost_inputs(exi_graph, table) return rootmost_inputs @ut.memoize def requestable_col_attrs(table): """ Maps names of requestable columns to indicies of internal columns """ requestable_col_attrs = {} for colattr in table.internal_data_col_attrs: rattr = {} colname = colattr['intern_colname'] rattr['intern_colx'] = colattr['intern_colx'] rattr['intern_colname'] = colattr['intern_colname'] requestable_col_attrs[colname] = rattr for colattr in table.data_col_attrs: rattr = {} if colattr.get('isnested'): nest_internal_names = ut.take_column(colattr['nestattrs'], 'flat_colname') nest_attrs = ut.dict_take(requestable_col_attrs, nest_internal_names) rattr['intern_colname'] = nest_internal_names rattr['intern_colx'] = ut.take_column(nest_attrs, 'intern_colx') rattr['isnested'] = True elif colattr.get('is_external'): intern_attr = requestable_col_attrs[colattr['intern_colname']] rattr['intern_colname'] = intern_attr['intern_colname'] rattr['intern_colx'] = intern_attr['intern_colx'] rattr['read_func'] = colattr['read_func'] rattr['write_func'] = colattr['write_func'] rattr['is_extern'] = True else: continue colname = colattr['colname'] rattr['colname'] = colname requestable_col_attrs[colname] = rattr return requestable_col_attrs @ut.memoize def computable_colnames(table): # These are the colnames that we expect to be computed intern_colnames = ut.take_column(table.internal_col_attrs, 'intern_colname') insertable_flags = [ not colattr.get('isprimary') for colattr in table.internal_col_attrs ] colnames = tuple(ut.compress(intern_colnames, insertable_flags)) return colnames @ut.reloadable_class class _TableComputeHelper(object): """ helper for computing functions """ # @profile def prepare_storage( table, dirty_parent_ids, proptup_gen, dirty_preproc_args, config_rowid, config ): """ Converts output from ``preproc_func`` to data that can be stored in SQL CommandLine: python -m dtool.depcache_table prepare_storage Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3(in_memory=False) >>> depc.clear_all() >>> tablename = 'labeler' >>> tablename = 'indexer' >>> config = {tablename + '_param': None, 'foo': 'bar'} >>> data = depc.get('labeler', [1, 2, 3], 'data', _debug=0) >>> data = depc.get('labeler', [1, 2, 3], 'data', config=config, _debug=0) >>> data = depc.get('indexer', [[1, 2, 3]], 'data', _debug=0) >>> data = depc.get('indexer', [[1, 2, 3]], 'data', config=config, _debug=0) >>> rowids = depc.get_rowids('indexer', [[1, 2, 3]], config=config, _debug=0) >>> table = depc[tablename] >>> model_uuid_list = table.get_internal_columns(rowids, ('model_uuid',)) >>> model_uuid = model_uuid_list[0] >>> rowids2 = table.get_model_rowids(model_uuid_list) >>> assert rowids == rowids2, 'bad rowid computation' >>> table.print_table() >>> table.print_internal_info() >>> table.print_configs() >>> table.print_model_manifests() >>> #ut.vd(depc.cache_dpath) """ if table.default_to_unpack: # Hack for tables explicilty specified with a single column proptup_gen = (None if data is None else (data,) for data in proptup_gen) # Flatten nested columns if any(table.get_data_col_attr('isnested')): proptup_gen = table._prepare_storage_nested(proptup_gen) # Write external columns if any(table.get_data_col_attr('write_func')): proptup_gen = table._prepare_storage_extern( dirty_parent_ids, config_rowid, config, proptup_gen ) if table.ismulti: manifest_dpath = table.dpath ut.ensuredir(manifest_dpath) # Concatenate data with internal rowids / config-id for ids_, data_cols, args_ in zip( dirty_parent_ids, proptup_gen, dirty_preproc_args ): try: if data_cols is None: yield None else: multi_parent_flags = table.get_parent_col_attr('ismulti') parent_colnames = table.get_parent_col_attr('intern_colname') multi_id_names = ut.compress(parent_colnames, multi_parent_flags) multi_ids = ut.compress(ids_, multi_parent_flags) multi_args = ut.compress(args_, multi_parent_flags) if table.ismulti: multi_setsizes = [] manifest_data = {} for multi_id, arg_, name in zip( multi_ids, multi_args, multi_id_names ): assert table.ismulti, 'only valid for models' # TODO: need to get back to root ids manifest_data.update( **{ name + '_multi_id': multi_id, name + '_primary_ids': 'FIXME' + str(arg_), name + '_model_input': list(arg_), } ) multi_setsizes.append(len(arg_)) # Make a new model uuid # TODO: maybe we should not do this here model_uuid = ut.hashable_to_uuid((multi_ids, config.get_cfgstr())) manifest_data['config'] = config manifest_data['model_uuid'] = model_uuid manifest_data['augmented'] = False manifest_fpath = table.get_model_manifest_fpath(model_uuid) ut.save_json(manifest_fpath, manifest_data, pretty=1) # TODO: hash all input UUIDs and the full config together quick_access_tup = (model_uuid, 0) # Give the setsize and setfpath data if needed parent_extra = tuple(ut.flatten(zip(multi_setsizes,))) else: quick_access_tup = tuple() parent_extra = tuple() # parent_extra = tuple(ut.flatten(zip(multi_setsizes, multi_setfpaths))) # parent_extra = tuple(ut.flatten([(len(arg), fname) for arg, # fname in zip(multi_args, # multi_fpaths)])) row_tup = ( ids_ + (config_rowid,) + quick_access_tup + data_cols + parent_extra ) # print('row_tup = %r' % (row_tup,)) yield row_tup except Exception as ex: ut.printex( ex, 'cat error', keys=['config_rowid', 'data_cols', 'parent_rowids'] ) raise def get_model_manifest_fname(table, model_uuid): manifest_fname = 'input_manifest_%s.json' % (model_uuid,) return manifest_fname def get_model_manifest_fpath(table, model_uuid): manifest_fname = table.get_model_manifest_fname(model_uuid) manifest_fpath = join(table.dpath, manifest_fname) return manifest_fpath def get_model_inputs(table, model_uuid): """ Ignore: >>> table.get_model_uuid([2]) [UUID('5b66772c-e654-dd9a-c9de-0ccc1bb6861c')] """ assert table.ismulti, 'must be a model' manifest_fpath = table.get_model_manifest_fpath(model_uuid) manifest_data = ut.load_json(manifest_fpath) return manifest_data def get_model_uuid(table, rowids): """ Ignore: >>> table.get_model_uuid([2]) [UUID('5b66772c-e654-dd9a-c9de-0ccc1bb6861c')] """ assert table.ismulti, 'must be a model' model_uuid_list = table.get_internal_columns(rowids, ('model_uuid',)) return model_uuid_list def get_model_rowids(table, model_uuid_list): """ Get the rowid of a model given its uuid Ignore: >>> import uuid >>> table.get_model_rowids([uuid.UUID('5b66772c-e654-dd9a-c9de-0ccc1bb6861c')]) [2] """ assert table.ismulti, 'must be a model' colnames = (table.rowid_colname,) andwhere_colnames = (table.model_uuid_colname,) params_iter = list(zip(model_uuid_list)) rowid_list = table.db.get_where_eq( table.tablename, colnames, params_iter, andwhere_colnames, eager=True, nInput=len(model_uuid_list), ) return rowid_list @profile def _prepare_storage_nested(table, proptup_gen): """ Hack for when a sql schema has tuples defined in it. Accepts nested tuples and flattens them to fit into the sql tables """ nCols = len(table.data_colnames) idxs1 = ut.where(table.get_data_col_attr('isnested')) idxs2 = ut.index_complement(idxs1, nCols) for data in proptup_gen: if data is None: yield None continue # Split data into nested and unnested columns unnested_cols = list(zip(ut.take(data, idxs2))) nested_cols = ut.take(data, idxs1) grouped_items = [nested_cols, unnested_cols] groupxs = [idxs1, idxs2] # Flatten nested columns unflat = ut.ungroup(grouped_items, groupxs, nCols - 1) # Recombine the data data_new = tuple(ut.flatten(unflat)) yield data_new # @profile def _prepare_storage_extern( table, dirty_parent_ids, config_rowid, config, proptup_gen ): """ Writes external data to disk if write function is specified. """ internal_data_col_attrs = table.internal_data_col_attrs writable_flags = ut.dict_take_column(internal_data_col_attrs, 'write_func', False) extern_colattrs = ut.compress(internal_data_col_attrs, writable_flags) # extern_colnames = ut.dict_take_column(extern_colattrs, 'colname') extern_writers = ut.dict_take_column(extern_colattrs, 'write_func') nCols = len(internal_data_col_attrs) idxs1 = ut.where(writable_flags) idxs2 = ut.index_complement(idxs1, nCols) extern_fnames_list = list( zip( *[ table._get_extern_fnames( dirty_parent_ids, config_rowid, config, extern_colattr ) for extern_colattr in extern_colattrs ] ) ) # get extern cache directory and fpaths extern_dpath = table.extern_dpath ut.ensuredir(extern_dpath, verbose=False or table.depc._debug) # extern_fpaths_list = [ # [join(extern_dpath, fname) for fname in fnames] # for fnames in extern_fnames_list # ] for data, extern_fpaths in zip(proptup_gen, extern_fnames_list): if data is None: yield None continue normal_data = ut.take(data, idxs2) try: extern_data = ut.take(data, idxs1) except Exception as ex: ut.printex(ex, 'Did you forget to return/yeild your data as a tuple?') raise # Write external data to disk try: _iter = zip(extern_data, extern_fpaths, extern_writers) for obj, fpath, write_func in _iter: abs_fpath = join(extern_dpath, fpath) # print('WRITE fpath = %r, abs_fpath = %r' % (fpath, abs_fpath, )) write_func(abs_fpath, obj) ut.assert_exists(abs_fpath, verbose=False) except Exception as ex: ut.printex(ex, 'external write', keys=['config_rowid', 'data']) raise # Return path instead of data grouped_items = [extern_fpaths, normal_data] groupxs = [idxs1, idxs2] data_new = tuple(ut.ungroup(grouped_items, groupxs, nCols - 1)) yield data_new def get_extern_fnames(table, parent_rowids, config, extern_col_index=0): """ convinience function around get_extern_fnames Exmaple: >>> from wbia.dtool.depcache_table import * # NOQA >>> import wbia >>> ibs = wbia.opendb(defaultdb='testdb1') >>> depc = ibs.depc_annot >>> tablename = 'chips' >>> table = depc[tablename] >>> extern_col_index = 0 >>> info_props = ['image_uuid', 'verts', 'theta'] >>> config = depc.configclass_dict[tablename]() >>> root_rowids = [1, 2, 3] >>> rowid_list = depc.get_rowids(tablename, root_rowids) >>> parent_rowids = table.get_parent_rowids(rowid_list) >>> fname_list = table.get_extern_fnames(parent_rowids, config) >>> print('fname_list = %r' % (fname_list,)) """ config_rowid = table.get_config_rowid(config) # depc.get_rowids(tablename, root_rowids, config) internal_data_col_attrs = table.internal_data_col_attrs writable_flags = ut.dict_take_column(internal_data_col_attrs, 'write_func', False) extern_colattrs = ut.compress(internal_data_col_attrs, writable_flags) extern_colattr = extern_colattrs[extern_col_index] fname_list = table._get_extern_fnames( parent_rowids, config_rowid, config, extern_colattr ) # if False: # root_rowids = table.depc.get_root_rowids(table.tablename, rowid_list) # info_props = ['image_uuid', 'verts', 'theta'] # table.depc.make_root_info_uuid(root_rowids, info_props) return fname_list def _get_extern_fnames( table, parent_rowids, config_rowid, config, extern_colattr=None ): """ TODO: * Clean up signature * Make this function return the filenames used by a specific external column in this table. The inputs are the parent_rowids, (and the root rowids?), and the config. Args: parent_rowids (list of tuples) - list of tuples of rowids """ config_hashid = table.get_config_hashid([config_rowid])[0] prefix = table.tablename prefix += '_' + extern_colattr['colname'] colattrs = table.data_col_attrs[extern_colattr['data_colx']] # if colname is not None: # prefix += '_' + colname # TODO: Put relevant root properties into the hash of the filename # (like bbox, parent image. basically the general vuuid and suuid. fmtstr = '{prefix}_id={rowids}_{config_hashid}{ext}' # HACK: check if the config specifies the extension type # extkey = table.extern_ext_config_keys.get(colname, 'ext') if 'extern_ext' in colattrs: ext = colattrs['extern_ext'] else: extkey = colattrs.get('extkey', 'ext') ext = config[extkey] if extkey in config else '.cPkl' fname_list = [ fmtstr.format( prefix=prefix, rowids='_'.join(list(map(str, rowids))), config_hashid=config_hashid, ext=ext, ) for rowids in parent_rowids ] return fname_list def _compute_dirty_rows( table, dirty_parent_ids, dirty_preproc_args, config_rowid, config, verbose=True ): """ dirty_preproc_args = preproc_args dirty_parent_ids = parent_rowids config_ = config """ nInput = len(dirty_parent_ids) # if verbose: # print('[deptbl.compute] nInput = %r' % (nInput,)) # Pack arguments into column-wise order to send to the func argsT = zip(*dirty_preproc_args) argsT = list(argsT) # TODO: remove # HACK extract config if given a request config_ = config.config if hasattr(config, 'config') else config # call registered worker function if table.vectorized: # Function is written in a way that only accepts multiple inputs at # once and generates output proptup_gen = table.preproc_func(table.depc, *argsT, config=config_) else: # Function is written in a way that only accepts a single row of # input at a time proptup_gen = ( table.preproc_func(table.depc, *argrow, config=config_) for argrow in zip(*argsT) ) DEBUG_LIST_MODE = True if DEBUG_LIST_MODE: proptup_gen = list(proptup_gen) num_output = len(proptup_gen) assert num_output == nInput, ( 'Input and output sizes do not agree. ' 'num_output=%r, num_input=%r' % (num_output, nInput,) ) # Append rowids and rectify nested and external columns dirty_params_iter = table.prepare_storage( dirty_parent_ids, proptup_gen, dirty_preproc_args, config_rowid, config_ ) if DEBUG_LIST_MODE: dirty_params_iter = list(dirty_params_iter) assert len(dirty_params_iter) == nInput # None data means that there was an error for a specific row return dirty_params_iter def _chunk_compute_dirty_rows( table, dirty_parent_ids, dirty_preproc_args, config_rowid, config, verbose=True ): """ Executes registered functions, does external storage and yeilds results to be stored internally in SQL. CommandLine: python -m dtool.depcache_table _chunk_compute_dirty_rows Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3(in_memory=False) >>> depc.clear_all() >>> data = depc.get('labeler', [1, 2, 3], 'data', _debug=True) >>> data = depc.get('indexer', [[1, 2, 3]], 'data', _debug=True) >>> depc.print_all_tables() """ nInput = len(dirty_parent_ids) chunksize = nInput if table.chunksize is None else table.chunksize if verbose: print( '[deptbl.compute] nInput={}, chunksize={}, tbl={}'.format( nInput, table.chunksize, table.tablename ) ) # Report computation progress dirty_iter = list(zip(dirty_parent_ids, dirty_preproc_args)) prog_iter = ut.ProgChunks( dirty_iter, chunksize, nInput, lbl='[deptbl.compute] add %s chunk' % (table.tablename), ) # These are the colnames that we expect to be computed colnames = table.computable_colnames() # def unfinished_features(): # if table._asobject: # # Convinience # argsT = [table.depc.get_obj(parent, rowids) # for parent, rowids in zip(table.parents(), # dirty_parent_ids_chunk)] # onthefly = None # if table.default_onthefly or onthefly: # assert not table.ismulti, ('cannot onthefly multi tables') # proptup_gen = [tuple([None] * len(table.data_col_attrs)) # for _ in range(len(dirty_parent_ids_chunk))] # pass # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION try: # prog_iter = list(prog_iter) for dirty_chunk in prog_iter: nChunkInput = len(dirty_chunk) if nChunkInput == 0: return dirty_parent_ids_chunk, dirty_preproc_args_chunk = zip(*dirty_chunk) dirty_params_iter = table._compute_dirty_rows( dirty_parent_ids_chunk, dirty_preproc_args_chunk, config_rowid, config, ) DEBUG_LIST_MODE = True if DEBUG_LIST_MODE: dirty_params_iter = list(dirty_params_iter) assert len(dirty_params_iter) == nChunkInput # TODO: Separate into func which can be specified as a callback. # None data means that there was an error for a specific row dirty_params_iter = ut.filter_Nones(dirty_params_iter) nChunkInput = len(dirty_params_iter) yield colnames, dirty_params_iter, nChunkInput except Exception as ex: ut.printex( ex, 'error in add_rowids', keys=[ 'table', 'table.parents()', 'config', 'argsT', 'config_rowid', 'dirty_parent_ids', 'table.preproc_func', ], tb=True, ) raise
[docs]@ut.reloadable_class class DependencyCacheTable( _TableGeneralHelper, _TableInternalSetup, _TableDebugHelper, _TableComputeHelper, _TableConfigHelper, ): r""" An individual node in the dependency graph. All SQL column information is stored in: internal_col_attrs - keeps track of internal info Additional metadata about specific columns is stored in parent_col_attrs - keeps track of parent info data_col_attrs - keeps track of computed data Attributes: db (dtool.SQLDatabaseController): pointer to underlying database depc (dtool.DependencyCache): pointer to parent cache tablename (str): name of the table docstr (str): documentation for table parent_tablenames (str): parent tables in depcache data_colnames (List[str]): columns produced by preproc_func data_coltypes (List[str]): column SQL types produced by preproc_func preproc_func (func): worker function vectorized (bool): by defaults it is assumed registered functions can process multiple inputs at once. taggable (bool): specifies if a computed object can be disconected from its ancestors and accessed via a tag. CommandLine: python -m dtool.depcache_table --exec-DependencyCacheTable Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> print(depc['vsmany']) >>> print(depc['spam']) >>> print(depc['vsone']) >>> print(depc['nnindexer']) """ @profile def __init__( table, depc=None, parent_tablenames=None, tablename=None, data_colnames=None, data_coltypes=None, preproc_func=None, docstr='no docstr', fname=None, asobject=False, chunksize=None, isinteractive=False, default_to_unpack=False, default_onthefly=False, rm_extern_on_delete=False, vectorized=True, taggable=False, ): """ recieves kwargs from depc._register_prop """ try: table.db = None except Exception: # HACK: jedi type hinting. Need to have non-obvious condition table.db = SQLDatabaseController() table.fpath_to_db = {} assert ( re.search('[0-9]', tablename) is None ), 'tablename=%r cannot contain numbers' % (tablename,) # parent depcache table.depc = depc # Definitions table.tablename = tablename table.docstr = docstr table.parent_tablenames = parent_tablenames table.data_colnames = tuple(data_colnames) table.data_coltypes = data_coltypes table.preproc_func = preproc_func table.fname = fname # Behavior table.on_delete = None table.default_to_unpack = default_to_unpack table.vectorized = vectorized table.taggable = taggable # table.store_modification_time = True # Use the filesystem to accomplish this # table.store_access_time = True # table.store_create_time = True # table.store_delete_time = True table.chunksize = chunksize # Developmental properties table.subproperties = {} table.isinteractive = isinteractive table._asobject = asobject table.default_onthefly = default_onthefly # SQL Internals table.sqldb_fpath = None table.rm_extern_on_delete = rm_extern_on_delete # Update internals table.parent_col_attrs = table._infer_parentcol() table.data_col_attrs = table._infer_datacol() table.internal_col_attrs = table._infer_allcol() # Check for errors if ut.SUPER_STRICT: table._assert_self() table._hack_chunk_cache = None # @profile
[docs] def initialize(table, _debug=None): """ Ensures the SQL schema for this cache table """ table.db = table.depc.fname_to_db[table.fname] # print('Checking sql for table=%r' % (table.tablename,)) if not table.db.has_table(table.tablename): if _debug or ut.VERBOSE: print('Initializing table=%r' % (table.tablename,)) new_state = table._get_addtable_kw() table.db.add_table(**new_state) else: # TODO: Check for table modifications new_state = table._get_addtable_kw() try: current_state = table.db.get_table_autogen_dict(table.tablename) except Exception as ex: strict = True ut.printex( ex, 'TABLE %s IS CORRUPTED' % (table.tablename,), iswarning=not strict, ) if strict: raise table.clear_table() current_state = table.db.get_table_autogen_dict(table.tablename) if current_state['coldef_list'] != new_state['coldef_list']: print('WARNING TABLE IS MODIFIED') if predrop_grace_period(table.tablename): table.clear_table() else: raise NotImplementedError('Need to be able to modify tables')
def _get_addtable_kw(table): """ Information that defines the SQL table CommandLine: python -m dtool.depcache_table _get_addtable_kw Example: >>> # DISABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> table1 = depc['indexer'] >>> table2 = depc['neighbs'] >>> add_table_kw1 = table1._get_addtable_kw() >>> add_table_kw2 = table2._get_addtable_kw() >>> result1 = ('%s.add_table_kw = %s' % (table1.tablename, ut.repr2(add_table_kw1, nl=2),)) >>> result2 = ('%s.add_table_kw = %s' % (table2.tablename, ut.repr2(add_table_kw2, nl=2),)) >>> print(result1) >>> print(result2) """ coldef_list = [ (colattr['intern_colname'], colattr['sqltype']) for colattr in table.internal_col_attrs ] superkeys = [table.superkey_colnames] add_table_kw = ut.odict( [ ('tablename', table.tablename,), ('coldef_list', coldef_list,), ('docstr', table.docstr,), ('superkeys', superkeys,), ('dependson', table.parents()), ] ) return add_table_kw # ---------------------- # --- GETTERS NATIVE --- # ---------------------- def _get_all_rowids(table): return table.db.get_all_rowids(table.tablename) @property def number_of_rows(table): return table.db.get_row_count(table.tablename) # @profile
[docs] def ensure_rows( table, parent_ids_, preproc_args, config=None, verbose=True, _debug=None ): """ Lazy addition Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> table = depc['vsone'] >>> exec(ut.execstr_funckw(table.get_rowid), globals()) >>> config = table.configclass() >>> _debug = 5 >>> verbose = True >>> # test duplicate inputs are detected and accounted for >>> parent_rowids = [(i, i) for i in list(range(100))] * 100 >>> rectify_tup = table._rectify_ids(parent_rowids) >>> (parent_ids_, preproc_args, idxs1, idxs2) = rectify_tup >>> rowids = table.ensure_rows(parent_ids_, preproc_args, config=config, _debug=_debug) >>> result = ('rowids = %r' % (rowids,)) >>> print(result) """ _debug = table.depc._debug if _debug is None else _debug # Get requested configuration id config_rowid = table.get_config_rowid(config) # Check which rows are already computed initial_rowid_list = table._get_rowid(parent_ids_, config=config) initial_rowid_list = list(initial_rowid_list) if table.depc._debug: print( '[deptbl.ensure] initial_rowid_list = %s' % (ut.trunc_repr(initial_rowid_list),) ) print('[deptbl.ensure] config_rowid = %r' % (config_rowid,)) # Get corresponding "dirty" parent rowids isdirty_list = ut.flag_None_items(initial_rowid_list) num_dirty = sum(isdirty_list) num_total = len(parent_ids_) if num_dirty > 0: with ut.Indenter('[ADD]', enabled=_debug): if verbose or _debug: print( 'Add %d / %d new rows to %r' % (num_dirty, num_total, table.tablename,) ) print( '[deptbl.add] * config_rowid = {}, config={}'.format( config_rowid, str(config) ) ) dirty_parent_ids_ = ut.compress(parent_ids_, isdirty_list) dirty_preproc_args_ = ut.compress(preproc_args, isdirty_list) # Process only unique items unique_flags = ut.flag_unique_items(dirty_parent_ids_) dirty_parent_ids = ut.compress(dirty_parent_ids_, unique_flags) dirty_preproc_args = ut.compress(dirty_preproc_args_, unique_flags) # Break iterator into chunks if False and verbose: # check parent configs we are working with for x, parname in enumerate(table.parents()): if parname == table.depc.root: continue parent_table = table.depc[parname] ut.take_column(parent_ids_, x) rowid_list = ut.take_column(parent_ids_, x) try: parent_history = parent_table.get_config_history(rowid_list) print('parent_history = %r' % (parent_history,)) except KeyError: print( '[depcache_table] WARNING: config history is having troubles... says Jon' ) # Gives the function a hacky cache to use between chunks table._hack_chunk_cache = {} gen = table._chunk_compute_dirty_rows( dirty_parent_ids, dirty_preproc_args, config_rowid, config ) """ colnames, dirty_params_iter, nChunkInput = next(gen) """ for colnames, dirty_params_iter, nChunkInput in gen: table.db._add( table.tablename, colnames, dirty_params_iter, nInput=nChunkInput ) # Remove cache when main add is done table._hack_chunk_cache = None if verbose or _debug: print('[deptbl.add] finished add') # # The requested data is clean and must now exist in the parent # database, do a lookup to ensure the correct order. rowid_list = table._get_rowid(parent_ids_, config=config) else: rowid_list = initial_rowid_list if _debug: print('[deptbl.add] rowid_list = %s' % ut.trunc_repr(rowid_list)) return rowid_list
def _rectify_ids(table, parent_rowids): r""" Filters any rows containing None ids and transforms many-to-one sets of rowids into hashable UUIDS. Example: >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> depc.clear_all() >>> tablename = 'vocab' >>> tablename = 'indexer' >>> table = depc[tablename] >>> parent_rowids = [[1, 2, 3]] >>> rectify_tup = table._rectify_ids(parent_rowids) >>> parent_ids_, preproc_args, idxs1, idxs2 = rectify_tup ...... >>> result = ('parent_ids_ = %r' % (parent_ids_,)) + '\n' >>> result += ('preproc_args = %r' % (preproc_args,)) >>> print(result) parent_ids_ = [(UUID('356a192b-7913-b04c-5457-4d18c28d46e6'),)] preproc_args = [[1, 2, 3]] Example1: >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import * # NOQA >>> depc = testdata_depc3() >>> depc.clear_all() >>> tablename = 'vocab' >>> tablename = 'indexer' >>> table = depc[tablename] >>> parent_rowids = [[1, 2, 3]] >>> rowids = depc.get_rowids(tablename, parent_rowids) >>> model_uuid_list = table.get_internal_columns(rowids, ('model_uuid',)) >>> model_uuid = model_uuid_list[0] >>> print('model_uuid = %r' % (model_uuid,)) >>> rowids2 = table.get_model_rowids(model_uuid_list) """ # Force entire row to be none if any are none anyNone_flags = [x is None or any(ut.flag_None_items(x)) for x in parent_rowids] idxs2 = ut.where(anyNone_flags) idxs1 = ut.index_complement(idxs2, len_=len(parent_rowids)) valid_parent_ids_ = ut.take(parent_rowids, idxs1) preproc_args = valid_parent_ids_ if table.ismulti: # Convert any parent-id containing multiple values into a hash of uuids multi_parent_flags = table.get_parent_col_attr('ismulti') num_parents = len(multi_parent_flags) multi_parent_colxs = ut.where(multi_parent_flags) normal_colxs = ut.index_complement(multi_parent_colxs, num_parents) multi_parents = [ ut.apply_grouping(ids_, multi_parent_colxs) for ids_ in valid_parent_ids_ ] normal_parents = [ ut.apply_grouping(ids_, normal_colxs) for ids_ in valid_parent_ids_ ] # TODO: give each table a uuid getter function that derives from # get_root_uuids multicol_tables = ut.take(table.parents(), multi_parent_colxs) parent_uuid_getters = [ table.depc.get_root_uuid if col == table.depc.root else ut.identity for col in multicol_tables ] parent_uuids_list = [ [ uuid_getter(ids_) for uuid_getter, ids_ in zip(parent_uuid_getters, ids_tup) ] for ids_tup in multi_parents ] multiset_uuid_list = [ [ut.hashable_to_uuid(uuids) for uuids in parent_uuids_tup] for parent_uuids_tup in parent_uuids_list ] # preproc args are usually the same as parent ids. Model tables # are the exception. parent_ids_ = [ tuple( ut.ungroup( [uuids, normalids], [multi_parent_colxs, normal_colxs], num_parents - 1, ) ) for uuids, normalids in zip(multiset_uuid_list, normal_parents) ] else: parent_ids_ = valid_parent_ids_ rectify_tup = parent_ids_, preproc_args, idxs1, idxs2 return rectify_tup def _unrectify_ids(table, rowid_list_, parent_rowids, idxs1, idxs2): """ Ensures that output is the same length as input. Inserts necessary Nones where the original input was also None. """ # FIXME: turn into generator rowid_list = ut.ungroup([rowid_list_], [idxs1], len(parent_rowids) - 1) return rowid_list
[docs] def get_rowid( table, parent_rowids, config=None, ensure=True, eager=True, nInput=None, recompute=False, _debug=None, num_retries=1, ): r""" Returns the rowids of derived properties. If they do not exist it computes them. Args: parent_rowids (list): list of tuples with the parent rowids as the value of each tuple config (None): (default = None) ensure (bool): eager evaluation if True (default = True) eager (bool): (default = True) nInput (int): (default = None) recompute (bool): (default = False) _debug (None): (default = None) Returns: list: rowid_list CommandLine: python -m dtool.depcache_table --exec-get_rowid Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache2 import testdata_depc3 >>> depc = testdata_depc3() >>> table = depc['labeler'] >>> exec(ut.execstr_funckw(table.get_rowid), globals()) >>> config = table.configclass() >>> _debug = True >>> parent_rowids = list(zip([1, None, None, 2])) >>> rowids = table.get_rowid(parent_rowids, config=config, _debug=_debug) >>> result = ('rowids = %r' % (rowids,)) >>> print(result) rowids = [1, None, None, 2] """ _debug = table.depc._debug if _debug is None else _debug if _debug: print( '[deptbl.get_rowid] Get %s rowids via %d parent superkeys' % (table.tablename, len(parent_rowids)) ) if _debug > 1: print('[deptbl.get_rowid] config = %r' % (config,)) print('[deptbl.get_rowid] ensure = %r' % (ensure,)) # Ensure inputs are in the correct format / remove Nones # Collapse multi-inputs into a UUID hash rectify_tup = table._rectify_ids(parent_rowids) (parent_ids_, preproc_args, idxs1, idxs2) = rectify_tup # Do the getting / adding work if recompute: print('REQUESTED RECOMPUTE') # get existing rowids, delete them, recompute the request rowid_list_ = table._get_rowid( parent_ids_, config=config, eager=True, nInput=None, _debug=_debug ) rowid_list_ = list(rowid_list_) needs_recompute_rowids = ut.filter_Nones(rowid_list_) try: table._recompute_and_store(needs_recompute_rowids) except Exception: # If the config changes, there is nothing we can do. # We have to delete the rows. table.delete_rows(rowid_list_) if ensure or recompute: # Compute properties if they do not exist for try_num in range(num_retries): try: rowid_list_ = table.ensure_rows( parent_ids_, preproc_args, config=config, _debug=_debug ) except ExternalStorageException: if try_num == num_retries - 1: raise else: rowid_list_ = table._get_rowid( parent_ids_, config=config, eager=eager, nInput=nInput, _debug=_debug ) # Map outputs to correspond with inputs rowid_list = table._unrectify_ids(rowid_list_, parent_rowids, idxs1, idxs2) return rowid_list
# @profile def _get_rowid(table, parent_ids_, config=None, eager=True, nInput=None, _debug=None): """ Returns rowids using parent superkeys. Does not add non-existing properties. """ colnames = (table.rowid_colname,) config_rowid = table.get_config_rowid(config=config) _debug = table.depc._debug if _debug is None else _debug if _debug: print('_get_rowid') print('_get_rowid table.tablename = %r ' % (table.tablename,)) print('_get_rowid parent_ids_ = %s' % (ut.trunc_repr(parent_ids_))) print('_get_rowid config = %s' % (config)) print('_get_rowid table.rowid_colname = %s' % (table.rowid_colname)) print('_get_rowid config_rowid = %s' % (config_rowid)) andwhere_colnames = table.superkey_colnames params_iter = (ids_ + (config_rowid,) for ids_ in parent_ids_) # TODO: make sure things that call this can accept a generator # Then remove this next line params_iter = list(params_iter) # print('**params_iter = %r' % (params_iter,)) rowid_list = table.db.get_where_eq( table.tablename, colnames, params_iter, andwhere_colnames, eager=eager, nInput=nInput, ) if _debug: print('_get_rowid rowid_list = %s' % (ut.trunc_repr(rowid_list))) return rowid_list
[docs] def clear_table(table): """ Deletes all data in this table """ # TODO: need to clear one-to-one dependencies as well print('Clearing data in %r' % (table,)) table.db.drop_table(table.tablename) table.db.add_table(**table._get_addtable_kw())
# @profile
[docs] def delete_rows(table, rowid_list, delete_extern=None, dry=False, verbose=None): """ CommandLine: python -m dtool.depcache_table --exec-delete_rows Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> #table = depc['keypoint'] >>> table = depc['chip'] >>> exec(ut.execstr_funckw(table.delete_rows), globals()) >>> tablename = table.tablename >>> graph = depc.explicit_graph >>> config1 = None >>> config2 = table.configclass(version=-1) >>> config3 = table.configclass(version=-1, ext='.jpg') >>> config4 = table.configclass(ext='.jpg') >>> # Create several configs of rowid >>> aids = [1, 2, 3] >>> depc.get_rowids('spam', aids, config=config1) >>> depc.get_rowids('spam', aids, config=config2) >>> depc.get_rowids('spam', aids, config=config3) >>> depc.get_rowids('spam', aids, config=config4) >>> # Delete the png configs >>> rowid_list1 = depc.get_rowids(table.tablename, aids, >>> config=config2) >>> rowid_list2 = depc.get_rowids(table.tablename, aids, >>> config=config1) >>> rowid_list = rowid_list1 + rowid_list2 >>> assert len(ut.setintersect_ordered(rowid_list1, rowid_list2)) == 0 >>> table.delete_rows(rowid_list) """ # import networkx as nx # from wbia.dtool.algo.preproc import preproc_feat if table.on_delete is not None and not dry: table.on_delete() if delete_extern is None: delete_extern = table.rm_extern_on_delete if verbose is None: verbose = False if ut.NOT_QUIET: if ut.VERBOSE: print( 'Requested delete of %d rows from %s' % (len(rowid_list), table.tablename) ) if dry: print('Dry run') # print('delete_extern = %r' % (delete_extern,)) depc = table.depc # TODO: # REMOVE EXTERNAL FILES internal_colnames = table.get_intern_data_col_attr('intern_colname') is_extern = table.get_intern_data_col_attr('is_external_pointer') extern_colnames = tuple(ut.compress(internal_colnames, is_extern)) if len(extern_colnames) > 0: uris = table.get_internal_columns( rowid_list, extern_colnames, unpack_scalars=False, eager=True, keepwrap=False, ) absuris = [] for uri in it.chain.from_iterable(uris): if not isinstance(uri, tuple): uri = [uri] for uri_ in uri: absuris.append(join(table.extern_dpath, uri_)) fpaths = [fpath for fpath in absuris if exists(fpath)] if delete_extern: if ut.VERBOSE or len(fpaths) > 0: print('deleting {} existing internal files'.format(len(fpaths))) if not dry: ut.remove_fpaths(fpaths, verbose=verbose) else: if ut.VERBOSE or len(fpaths) > 0: print('Leaving {} dangling filepaths'.format(len(fpaths))) # DELETE EXPLICITLY DEFINED CHILDREN # (TODO: handle implicit definitions) if True: def get_child_partial_rowids(child_table, rowid_list, parent_colnames): colnames = (child_table.rowid_colname,) andwhere_colnames = parent_colnames params_iter = ((rowid,) for rowid in rowid_list) params_iter = list(params_iter) child_db = depc[child_table.tablename].db child_unflat_rowids = child_db.get_where_eq( child_table.tablename, colnames, params_iter, andwhere_colnames, unpack_scalars=False, keepwrap=False, ) child_rowids = ut.flatten(child_unflat_rowids) return child_rowids if ut.VERBOSE: if table.children: print('Deleting from %r children' % (len(table.children),)) else: print('Table is a leaf node') for child in table.children: child_table = table.depc[child] if not child_table.ismulti: # Hack, wont work for vsone / multisets parent_colnames = ( child_table.parent[table.tablename]['intern_colname'], ) child_rowids = get_child_partial_rowids( child_table, rowid_list, parent_colnames ) child_table.delete_rows(child_rowids, dry=dry) if ut.NOT_QUIET: non_none_rowids = ut.filter_Nones(rowid_list) if ut.VERBOSE or len(non_none_rowids) > 0: print( 'Deleting %d non-None rows from %s' % (len(non_none_rowids), table.tablename) ) print('...done!') # Finalize: Delete rows from this table if not dry: table.db.delete_rowids(table.tablename, rowid_list) num_deleted = len(ut.filter_Nones(rowid_list)) else: num_deleted = 0 return num_deleted
def _resolve_requested_columns(table, requested_colnames): ######## # Map requested colnames flat to internal colnames ######## # Get requested column information requestable_col_attrs = table.requestable_col_attrs() requested_colattrs = ut.take(requestable_col_attrs, requested_colnames) # Make column indicies iterable for grouping intern_colxs = [ xs if ut.isiterable(xs) else [xs] for xs in ut.take_column(requested_colattrs, 'intern_colx') ] nested_offsets_end = ut.cumsum(ut.lmap(len, intern_colxs)) nested_offsets_start = [0] + nested_offsets_end[:-1] # Mark any columns with external information isextern_flags = ut.dict_take_column(requested_colattrs, 'is_extern', False) extern_colattrs = ut.compress(requested_colattrs, isextern_flags) extern_resolve_colxs = ut.compress(nested_offsets_start, isextern_flags) extern_read_funcs = ut.take_column(extern_colattrs, 'read_func') intern_colnames_ = ut.take_column(table.internal_col_attrs, 'intern_colname') intern_colnames = ut.unflat_take(intern_colnames_, intern_colxs) # TODO: this can be cleaned up nesting_xs = [ x1 if x2 - x1 == 1 else list(range(x1, x2)) for x1, x2 in zip(nested_offsets_start, nested_offsets_end) ] extern_resolve_tups = list(zip(extern_resolve_colxs, extern_read_funcs)) flat_intern_colnames = tuple(ut.flatten(intern_colnames)) return nesting_xs, extern_resolve_tups, flat_intern_colnames # @profile
[docs] def get_row_data( table, tbl_rowids, colnames=None, _debug=None, read_extern=True, num_retries=1, eager=True, nInput=None, ensure=True, delete_on_fail=True, showprog=False, unpack_columns=None, ): r""" FIXME: unpacking is confusing with sql controller TODO: Clean up and allow for eager=False colnames = ('mask', 'size') CommandLine: python -m dtool.depcache_table --test-get_row_data:0 python -m dtool.depcache_table --test-get_row_data:1 Example: >>> # ENABLE_DOCTEST >>> from wbia.dtool.depcache_table import * # NOQA >>> from wbia.dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> table = depc['chip'] >>> exec(ut.execstr_funckw(table.get_row_data), globals()) >>> tbl_rowids = depc.get_rowids('chip', [1, 2, 3], _debug=True, recompute=True) >>> colnames = ('size_1', 'size', 'chip' + EXTERN_SUFFIX, 'chip') >>> kwargs = dict(read_extern=True, num_retries=1, _debug=True) >>> prop_list = table.get_row_data(tbl_rowids, colnames, **kwargs) >>> prop_list0 = ut.take_column(prop_list, [0, 1, 2]) # data subset >>> result = (ut.repr2(prop_list0, nl=1)) >>> print(result) >>> #_debug, num_retries, read_extern = True, 1, True >>> prop_gen = table.get_row_data(tbl_rowids, colnames, eager=False) >>> prop_list2 = list(prop_gen) >>> assert len(prop_list2) == len(prop_list), 'inconsistent lens' >>> assert all([ut.lists_eq(prop_list2[1], prop_list[1]) for x in range(len(prop_list))]), 'inconsistent vals' >>> chips = table.get_row_data(tbl_rowids, 'chip', eager=False) [ [2453, (1707, 2453), 'chip_chip_id=1_pyrappzicqoskdjq.png'], [250, (300, 250), 'chip_chip_id=2_pyrappzicqoskdjq.png'], [372, (545, 372), 'chip_chip_id=3_pyrappzicqoskdjq.png'], ] Example: >>> # ENABLE_DOCTEST >>> # Test external / ensure getters >>> from wbia.dtool.example_depcache import * # NOQA >>> depc = testdata_depc() >>> table = depc['chip'] >>> exec(ut.execstr_funckw(table.get_row_data), globals()) >>> depc.clear_all() >>> config = {} >>> aids = [1,] >>> read_extern = False >>> tbl_rowids = depc.get_rowids('chip', aids, config=config) >>> data_fpaths = depc.get('chip', aids, 'chip', config=config, read_extern=False) >>> # Ensure data is recomputed if an external file is missing >>> ut.remove_fpaths(data_fpaths) >>> data = table.get_row_data(tbl_rowids, 'chip', read_extern=False, ensure=False) >>> data = table.get_row_data(tbl_rowids, 'chip', read_extern=False, ensure=True) """ _debug = table.depc._debug if _debug is None else _debug if _debug: print( ('Get col of tablename=%r, colnames=%r with ' 'tbl_rowids=%s') % (table.tablename, colnames, ut.trunc_repr(tbl_rowids)) ) #### # Resolve requested column names if unpack_columns is None: unpack_columns = table.default_to_unpack if colnames is None: requested_colnames = table.data_colnames elif isinstance(colnames, six.string_types): # Unpack columns if only a single column is requested. requested_colnames = (colnames,) unpack_columns = True else: requested_colnames = colnames if _debug: print('requested_colnames = %r' % (requested_colnames,)) tup = table._resolve_requested_columns(requested_colnames) nesting_xs, extern_resolve_tups, flat_intern_colnames = tup if _debug: print( '[deptbl.get_row_data] flat_intern_colnames = %r' % (flat_intern_colnames,) ) nonNone_flags = ut.flag_not_None_items(tbl_rowids) nonNone_tbl_rowids = ut.compress(tbl_rowids, nonNone_flags) idxs1 = ut.where(nonNone_flags) idxs2 = ut.index_complement(idxs1, len(tbl_rowids)) #### # Read data stored in SQL # FIXME: understand unpack_scalars and keepwrap # if table.default_onthefly: # table._onthefly_dataget # else: if nInput is None and ut.is_listlike(nonNone_tbl_rowids): nInput = len(nonNone_tbl_rowids) generator_version = not eager raw_prop_list = table.get_internal_columns( nonNone_tbl_rowids, flat_intern_colnames, eager=eager, nInput=nInput, unpack_scalars=True, keepwrap=True, showprog=showprog, ) def tup_unflat_take(items_list, unflat_index_list): r""" Hack for depcache, that needs a tuple version of ut.unflat_take """ def tuptake(list_, index_list): try: return tuple([list_[index] for index in index_list]) except TypeError: return list_[index_list] return tuple( [ tup_unflat_take(items_list, xs) if isinstance(xs, list) else tuptake(items_list, xs) for xs in unflat_index_list ] ) # if len(raw_prop_list) > 0: if nInput > 0 and len(nonNone_tbl_rowids) > 0: if generator_version: def _generator_resolve_all(): extern_dpath = table.extern_dpath for rawprop in raw_prop_list: if rawprop is None: raise Exception( 'raw prop was None, but it should always be a tuple. ' 'This may indicate that the cache needs to be cleared' ) exprop = list(rawprop) # Modify prop with external data for extern_colx, read_func in extern_resolve_tups: uri = exprop[extern_colx] uri_full = join(extern_dpath, uri) if read_extern: data = read_func(uri_full) else: data = uri_full if ensure: ut.assertpath(uri_full) exprop[extern_colx] = data # nestprop = ut.unflat_take(exprop, nesting_xs) nestprop = tup_unflat_take(exprop, nesting_xs) yield nestprop prop_gen = _generator_resolve_all() if unpack_columns: prop_gen = (None if p is None else p[0] for p in prop_gen) assert len(idxs2) == 0, 'noneager mode not fully worked out yet' return prop_gen else: # print('raw_prop_list = %r' % (raw_prop_list,)) if num_retries > 0: raw_prop_list = list(raw_prop_list) # TODO tee iterator instead? for try_num in range(num_retries + 1): tries_left = num_retries - try_num try: prop_listT = table._resolve_any_external_data( nonNone_tbl_rowids, raw_prop_list, extern_resolve_tups, ensure, read_extern, delete_on_fail, tries_left, _debug, ) except ExternalStorageException: if tries_left == 0: raise else: # Things worked, dont need to try again break #### # Unflatten data into any given nested structure if len(prop_listT) > 0: nested_proplistT = ut.unflat_take(prop_listT, nesting_xs) for tx in ut.where([isinstance(xs, list) for xs in nesting_xs]): nested_proplistT[tx] = list(zip(*nested_proplistT[tx])) prop_list = list(zip(*nested_proplistT)) else: prop_list = [] #### # Unpack single column datas if requested if unpack_columns: prop_list = [None if p is None else p[0] for p in prop_list] else: prop_list = [] if len(idxs2) > 0: prop_list = ut.ungroup( [prop_list, [None] * len(idxs2)], [idxs1, idxs2], len(tbl_rowids) - 1 ) return prop_list
def _resolve_any_external_data( table, nonNone_tbl_rowids, raw_prop_list, extern_resolve_tups, ensure, read_extern, delete_on_fail, tries_left, _debug, ): #### # Read data specified by any external columns extern_dpath = table.extern_dpath try: prop_listT = list(zip(*raw_prop_list)) except TypeError as ex: ut.printex(ex, 'error on prop_list shape', keys=['raw_prop_list']) raise for extern_colx, read_func in extern_resolve_tups: if _debug: print('[deptbl.get_row_data] read_func = %r' % (read_func,)) data_list = [] failed_list = [] for uri in prop_listT[extern_colx]: uri_full = join(extern_dpath, uri) try: if read_extern: data = read_func(uri_full) else: if ensure: ut.assertpath(uri_full) data = uri_full except Exception as ex: ut.printex( ex, 'failed to load external data', iswarning=(tries_left > 0), keys=[ 'tries_left', 'uri', 'uri_full', (exists, 'uri_full'), 'read_func', ], ) if tries_left == 0: raise failed_list.append(True) data = None else: failed_list.append(False) data_list.append(data) if any(failed_list): # FIXME: should directly recompute the data in the rows # rather than deleting the rowids. Need the parent ids and # config to do that. failed_uris = ut.compress(prop_listT[extern_colx], failed_list) print('Failed to read %s' % (ut.trunc_repr(failed_uris, maxlen=300))) failed_rowids = ut.compress(nonNone_tbl_rowids, failed_list) if delete_on_fail: table._recompute_external_storage(failed_rowids) # table.delete_rows(failed_rowids, delete_extern=None) raise ExternalStorageException( 'Some cached filenames failed to read. ' 'Need to recompute %d/%d rows' % (sum(failed_list), len(failed_list)) ) # raise Exception('Non existant data on disk. Need to recompute rows') prop_listT[extern_colx] = data_list return prop_listT def _recompute_external_storage(table, tbl_rowids): """ Recomputes the external file stored for this row. This DOES NOT modify the depcache internals. """ assert STORE_CFGDICT print('Recomputing external data (_recompute_external_storage)') # TODO: need to rectify parent ids? parent_rowids = table.get_parent_rowids(tbl_rowids) parent_rowargs = table.get_parent_rowargs(tbl_rowids) # configs = table.get_row_configs(tbl_rowids) # assert ut.allsame(list(map(id, configs))), 'more than one config not yet supported' # TODO; groupby config config_rowids = table.get_row_cfgid(tbl_rowids) unique_cfgids, groupxs = ut.group_indices(config_rowids) for xs, cfgid in zip(groupxs, unique_cfgids): parent_ids = ut.take(parent_rowids, xs) parent_args = ut.take(parent_rowargs, xs) config = table.get_config_from_rowid([cfgid])[0] dirty_params_iter = table._compute_dirty_rows( parent_ids, parent_args, config_rowid=cfgid, config=config ) # Evaulate just to ensure storage ut.evaluate_generator(dirty_params_iter) def _recompute_and_store(table, tbl_rowids, config=None): """ Recomputes all data stored for this row. This DOES modify the depcache internals. """ assert STORE_CFGDICT print('Recomputing external data (_recompute_and_store)') if len(tbl_rowids) == 0: return parent_rowids = table.get_parent_rowids(tbl_rowids) parent_rowargs = table.get_parent_rowargs(tbl_rowids) # configs = table.get_row_configs(tbl_rowids) # assert ut.allsame(list(map(id, configs))), 'more than one config not yet supported' # TODO; groupby config if config is None: config_rowids = table.get_row_cfgid(tbl_rowids) unique_cfgids, groupxs = ut.group_indices(config_rowids) else: # This is incredibly hacky. pass colnames = table.computable_colnames() for xs, cfgid in zip(groupxs, unique_cfgids): parent_ids = ut.take(parent_rowids, xs) parent_args = ut.take(parent_rowargs, xs) rowids = ut.take(tbl_rowids, xs) config = table.get_config_from_rowid([cfgid])[0] dirty_params_iter = table._compute_dirty_rows( parent_ids, parent_args, config_rowid=cfgid, config=config ) # Evaulate to external and internal storage table.db.set(table.tablename, colnames, dirty_params_iter, rowids) # _onthefly_dataget # togroup_args = [parent_rowids] # grouped_parent_ids = ut.apply_grouping(parent_rowids, groupxs) # unique_args_list = [unique_configs] # raw_prop_lists = [] ##func = ut.partial(table.preproc_func, table.depc) # def groupmap_func(group_args, unique_args): # config_ = unique_args[0] # argsT = group_args # propgen = table.preproc_func(table.depc, *argsT, config=config_) # return list(propgen) # def grouped_map(groupmap_func, groupxs, togroup_args, unique_args_list): # # TODO; genralize to utool # grouped_args_list = [ut.apply_grouping(togroup, groupxs) for # togroup in togroup_args] # group_ret_list = [] # for group_args, unique_args in zip(grouped_args_list, # unique_args_list): # group_ret = groupmap_func(group_args, unique_args) # group_ret_list.append(group_ret) # ret_list = ut.ungroup(group_ret_list, groupxs) # return ret_list # # raw_prop_list = grouped_map(groupmap_func, groupxs, togroup_args, # unique_args_list) # @profile
[docs] def get_internal_columns( table, tbl_rowids, colnames=None, eager=True, nInput=None, unpack_scalars=True, keepwrap=False, showprog=False, ): """ Access data in this table using the table PRIMARY KEY rowids (not depc PRIMARY ids) """ prop_list = table.db.get( table.tablename, colnames, tbl_rowids, id_colname=table.rowid_colname, eager=eager, nInput=nInput, unpack_scalars=unpack_scalars, keepwrap=keepwrap, showprog=showprog, ) return prop_list
[docs] def export_rows(table, rowid, target): """ The goal of this is to export taggable data that can be used independantly of its dependant features. TODO List: * Gather information about columns * Native and (localized) external data - <table>_rowid - non-transferable - Parent UUIDS - non-transferable - config rowid - non-transferable - model_uuid - - augment_bit - transferable - trivial - words_extern_uri - copy to destination - feat_setsize - transferable - trivial - model_tag * Should also gather info from manifest: * feat_setuuid_primary_ids - non-transferable * feat_setuuid_model_input - non-transferable * Should gather exhaustive config history * Save to disk * Add function to reload data in exported format * Getters should be able to specify a tag inplace of the root input for the tagged. Additionally native root-ids should also be allowed. rowid = 1 """ raise NotImplementedError('unfinished') colnames = tuple(table.db.get_column_names(table.tablename)) colvals = table.db.get(table.tablename, colnames, [rowid])[0] # NOQA uuid = table.get_model_uuid([rowid])[0] manifest_data = table.get_model_inputs(uuid) # NOQA config_history = table.get_config_history([rowid]) # NOQA table.parent_col_attrs = table._infer_parentcol() table.data_col_attrs table.internal_col_attrs table.db.cur.execute('SELECT * FROM {tablename} WHERE rowid=?') pass
if __name__ == '__main__': r""" CommandLine: python -m dtool.depcache_table python -m dtool.depcache_table --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()