Source code for utool.util_cache

# -*- coding: utf-8 -*-
"""
This module needs serious refactoring and testing
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import shelve
import six
import uuid
import json
import codecs
import os

# import lru
# git+https://github.com/amitdev/lru-dict
# import atexit
# import inspect
import contextlib
import collections
from six.moves import cPickle as pickle  # NOQA
from six.moves import range, zip
from os.path import join, normpath, basename, exists
from functools import partial
from itertools import chain
import zipfile
from utool import util_arg
from utool import util_hash
from utool import util_inject
from utool import util_path
from utool import util_io
from utool import util_str
from utool import util_cplat
from utool import util_inspect
from utool import util_list
from utool import util_class
from utool import util_type

# from utool import util_decor
from utool import util_dict
from utool._internal import meta_util_constants

print, rrr, profile = util_inject.inject2(__name__)


# TODO: Remove globalness

VERBOSE = util_arg.VERBOSE
QUIET = util_arg.QUIET
VERBOSE_CACHE = util_arg.NOT_QUIET
USE_CACHE = not util_arg.get_argflag('--nocache')
__APPNAME__ = meta_util_constants.default_appname  # the global application name


[docs]class CacheMissException(Exception): pass
# class YACacher(object): # @six.add_metaclass(util_class.ReloadingMetaclass)
[docs]@util_class.reloadable_class class ShelfCacher(object): """yet another cacher""" def __init__(self, fpath, enabled=True): self.verbose = True if self.verbose: print('[shelfcache] initializing()') self.fpath = fpath self.shelf = None if not enabled else shelve.open(fpath) def __del__(self): self.close() def __getitem__(self, cachekey): return self.load(cachekey) def __setitem__(self, cachekey, data): return self.save(cachekey, data)
[docs] def keys(self): return self.shelf.keys()
[docs] def load(self, cachekey): if self.verbose: print('[shelfcache] loading %s' % (cachekey,)) cachekey = cachekey.encode('ascii') if self.shelf is None or cachekey not in self.shelf: raise CacheMissException( 'Cache miss cachekey=%r self.fpath=%r' % (cachekey, self.fpath) ) else: return self.shelf[cachekey]
[docs] def save(self, cachekey, data): if self.verbose: print('[shelfcache] saving %s' % (cachekey,)) cachekey = cachekey.encode('ascii') if self.shelf is not None: self.shelf[cachekey] = data self.shelf.sync()
[docs] def clear(self): if self.verbose: print('[shelfcache] clearing cache') self.shelf.clear() self.shelf.sync()
[docs] def close(self): if self.verbose: print('[shelfcache] closing()') if self.shelf is not None: self.shelf.close()
[docs]def get_default_appname(): global __APPNAME__ return __APPNAME__
[docs]def text_dict_read(fpath): try: with open(fpath, 'r') as file_: dict_text = file_.read() except IOError: dict_text = '{}' try: dict_ = eval(dict_text, {}, {}) except SyntaxError as ex: import utool as ut print(dict_text) ut.printex(ex, 'Bad Syntax', keys=['dict_text']) dict_ = {} if util_arg.SUPER_STRICT: raise return dict_
[docs]def text_dict_write(fpath, dict_): """ Very naive, but readable way of storing a dictionary on disk FIXME: This broke on RoseMary's big dataset. Not sure why. It gave bad syntax. And the SyntaxError did not seem to be excepted. """ # dict_ = text_dict_read(fpath) # dict_[key] = val dict_text2 = util_str.repr4(dict_, strvals=False) if VERBOSE: print('[cache] ' + str(dict_text2)) util_io.write_to(fpath, dict_text2)
[docs]def consensed_cfgstr(prefix, cfgstr, max_len=128, cfgstr_hashlen=16): if len(prefix) + len(cfgstr) > max_len: hashed_cfgstr = util_hash.hashstr27(cfgstr, hashlen=cfgstr_hashlen) # Hack for prettier names if not prefix.endswith('_'): fname_cfgstr = prefix + '_' + hashed_cfgstr else: fname_cfgstr = prefix + hashed_cfgstr else: fname_cfgstr = prefix + cfgstr return fname_cfgstr
def _args2_fpath(dpath, fname, cfgstr, ext): r""" Ensures that the filename is not too long Internal util_cache helper function Windows MAX_PATH=260 characters Absolute length is limited to 32,000 characters Each filename component is limited to 255 characters Args: dpath (str): fname (str): cfgstr (str): ext (str): Returns: str: fpath CommandLine: python -m utool.util_cache --test-_args2_fpath Example: >>> # ENABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> from utool.util_cache import _args2_fpath >>> import utool as ut >>> dpath = 'F:\\data\\work\\PZ_MTEST\\_ibsdb\\_wbia_cache' >>> fname = 'normalizer_' >>> cfgstr = u'PZ_MTEST_DSUUIDS((9)67j%dr%&bl%4oh4+)_QSUUIDS((9)67j%dr%&bl%4oh4+)zebra_plains_vsone_NN(single,K1+1,last,cks1024)_FILT(ratio<0.625;1.0,fg;1.0)_SV(0.01;2;1.57minIn=4,nRR=50,nsum,)_AGG(nsum)_FLANN(4_kdtrees)_FEATWEIGHT(ON,uselabel,rf)_FEAT(hesaff+sift_)_CHIP(sz450)' >>> ext = '.cPkl' >>> fpath = _args2_fpath(dpath, fname, cfgstr, ext) >>> result = str(ut.ensure_unixslash(fpath)) >>> target = 'F:/data/work/PZ_MTEST/_ibsdb/_wbia_cache/normalizer_xfylfboirymmcpfg.cPkl' >>> ut.assert_eq(result, target) """ if len(ext) > 0 and ext[0] != '.': raise ValueError('Please be explicit and use a dot in ext') max_len = 128 # should hashlen be larger? cfgstr_hashlen = 16 prefix = fname fname_cfgstr = consensed_cfgstr( prefix, cfgstr, max_len=max_len, cfgstr_hashlen=cfgstr_hashlen ) fpath = join(dpath, fname_cfgstr + ext) fpath = normpath(fpath) return fpath
[docs]def save_cache(dpath, fname, cfgstr, data, ext='.cPkl', verbose=None): """ Saves data using util_io, but smartly constructs a filename """ fpath = _args2_fpath(dpath, fname, cfgstr, ext) util_io.save_data(fpath, data, verbose=verbose) return fpath
[docs]def load_cache(dpath, fname, cfgstr, ext='.cPkl', verbose=None, enabled=True): """ Loads data using util_io, but smartly constructs a filename """ if verbose is None: verbose = VERBOSE_CACHE if not USE_CACHE or not enabled: if verbose > 1: print( '[util_cache] ... cache disabled: dpath=%s cfgstr=%r' % (basename(dpath), cfgstr) ) raise IOError(3, 'Cache Loading Is Disabled') fpath = _args2_fpath(dpath, fname, cfgstr, ext) if not exists(fpath): if verbose > 0: print( '[util_cache] ... cache does not exist: dpath=%r fname=%r cfgstr=%r' % (basename(dpath), fname, cfgstr) ) raise IOError(2, 'No such file or directory: %r' % (fpath,)) else: if verbose > 2: print( '[util_cache] ... cache exists: dpath=%r fname=%r cfgstr=%r' % (basename(dpath), fname, cfgstr) ) import utool as ut nbytes = ut.get_file_nBytes(fpath) big_verbose = (nbytes > 1e6 and verbose > 2) or verbose > 2 if big_verbose: print('[util_cache] About to read file of size %s' % (ut.byte_str2(nbytes),)) try: with ut.Timer(fpath, verbose=big_verbose and verbose > 3): data = util_io.load_data(fpath, verbose=verbose > 2) except (EOFError, IOError, ImportError) as ex: print('CORRUPTED? fpath = %s' % (fpath,)) if verbose > 1: print( '[util_cache] ... cache miss dpath=%s cfgstr=%r' % (basename(dpath), cfgstr) ) raise IOError(str(ex)) except Exception: print('CORRUPTED? fpath = %s' % (fpath,)) raise else: if verbose > 2: print('[util_cache] ... cache hit') return data
[docs]def tryload_cache(dpath, fname, cfgstr, verbose=None): """ returns None if cache cannot be loaded """ try: return load_cache(dpath, fname, cfgstr, verbose=verbose) except IOError: return None
[docs]@profile def tryload_cache_list(dpath, fname, cfgstr_list, verbose=False): """ loads a list of similar cached datas. Returns flags that needs to be computed """ data_list = [tryload_cache(dpath, fname, cfgstr, verbose) for cfgstr in cfgstr_list] ismiss_list = [data is None for data in data_list] return data_list, ismiss_list
[docs]@profile def tryload_cache_list_with_compute( use_cache, dpath, fname, cfgstr_list, compute_fn, *args ): """ tries to load data, but computes it if it can't give a compute function """ # Load precomputed values if use_cache is False: data_list = [None] * len(cfgstr_list) ismiss_list = [True] * len(cfgstr_list) # Don't load or save, just compute data_list = compute_fn(ismiss_list, *args) return data_list else: data_list, ismiss_list = tryload_cache_list( dpath, fname, cfgstr_list, verbose=False ) num_total = len(cfgstr_list) if any(ismiss_list): # Compute missing values newdata_list = compute_fn(ismiss_list, *args) newcfgstr_list = util_list.compress(cfgstr_list, ismiss_list) index_list = util_list.list_where(ismiss_list) print( '[cache] %d/%d cache hits for %s in %s' % (num_total - len(index_list), num_total, fname, util_path.tail(dpath)) ) # Cache write for newcfgstr, newdata in zip(newcfgstr_list, newdata_list): save_cache(dpath, fname, newcfgstr, newdata, verbose=False) # Populate missing result for index, newdata in zip(index_list, newdata_list): data_list[index] = newdata else: print( '[cache] %d/%d cache hits for %s in %s' % (num_total, num_total, fname, util_path.tail(dpath)) ) return data_list
[docs]class Cacher(object): """ old non inhertable version of cachable """ def __init__( self, fname, cfgstr=None, cache_dir='default', appname='utool', ext='.cPkl', verbose=None, enabled=True, ): if verbose is None: verbose = VERBOSE if cache_dir == 'default': cache_dir = util_cplat.get_app_resource_dir(appname) util_path.ensuredir(cache_dir) self.dpath = cache_dir self.fname = fname self.cfgstr = cfgstr self.verbose = verbose self.ext = ext self.enabled = enabled
[docs] def get_fpath(self): fpath = _args2_fpath(self.dpath, self.fname, self.cfgstr, self.ext) return fpath
[docs] def existing_versions(self): """ Returns data with different cfgstr values that were previously computed with this cacher. """ import glob pattern = self.fname + '_*' + self.ext for fname in glob.glob1(self.dpath, pattern): fpath = join(self.dpath, fname) yield fpath
[docs] def exists(self, cfgstr=None): return exists(self.get_fpath())
[docs] def load(self, cfgstr=None): cfgstr = self.cfgstr if cfgstr is None else cfgstr # assert cfgstr is not None, 'must specify cfgstr in constructor or call' if cfgstr is None: import warnings warnings.warn('No cfgstr given in Cacher constructor or call') cfgstr = '' assert self.fname is not None, 'no fname' assert self.dpath is not None, 'no dpath' # TODO: use the computed fpath from this object instead data = load_cache( self.dpath, self.fname, cfgstr, self.ext, verbose=self.verbose, enabled=self.enabled, ) if self.verbose > 1: print('[cache] ... ' + self.fname + ' Cacher hit') return data
[docs] def tryload(self, cfgstr=None): """ Like load, but returns None if the load fails """ if cfgstr is None: cfgstr = self.cfgstr if cfgstr is None: import warnings warnings.warn('No cfgstr given in Cacher constructor or call') cfgstr = '' # assert cfgstr is not None, ( # 'must specify cfgstr in constructor or call') if not self.enabled: if self.verbose > 0: print('[cache] ... %s Cacher disabled' % (self.fname)) return None try: if self.verbose > 1: print('[cache] tryload fname=%s' % (self.fname,)) # if self.verbose > 2: # print('[cache] cfgstr=%r' % (cfgstr,)) return self.load(cfgstr) except IOError: if self.verbose > 0: print('[cache] ... %s Cacher miss' % (self.fname))
[docs] def ensure(self, func, *args, **kwargs): data = self.tryload() if data is None: data = func(*args, **kwargs) self.save(data) return data
[docs] def save(self, data, cfgstr=None): if not self.enabled: return cfgstr = self.cfgstr if cfgstr is None else cfgstr # assert cfgstr is not None, 'must specify cfgstr in constructor or call' if cfgstr is None: import warnings warnings.warn('No cfgstr given in Cacher constructor or call') cfgstr = '' assert self.fname is not None, 'no fname' assert self.dpath is not None, 'no dpath' if self.verbose > 0: print('[cache] ... ' + self.fname + ' Cacher save') save_cache(self.dpath, self.fname, cfgstr, data, self.ext)
# @util_decor.memoize
[docs]def make_utool_json_encoder(allow_pickle=False): """ References: http://stackoverflow.com/questions/8230315/python-sets-are http://stackoverflow.com/questions/11561932/why-does-json https://github.com/jsonpickle/jsonpickle http://stackoverflow.com/questions/24369666/typeerror-b1 http://stackoverflow.com/questions/30469575/how-to-pickle """ import utool as ut PYOBJECT_TAG = '__PYTHON_OBJECT__' UUID_TAG = '__UUID__' SLICE_TAG = '__SLICE__' def decode_pickle(text): obj = pickle.loads(codecs.decode(text.encode(), 'base64')) return obj def encode_pickle(obj): try: # Use protocol 2 to support both python2.7 and python3 COMPATIBLE_PROTOCOL = 2 pickle_bytes = pickle.dumps(obj, protocol=COMPATIBLE_PROTOCOL) except Exception: raise text = codecs.encode(pickle_bytes, 'base64').decode() return text type_to_tag = collections.OrderedDict( [(slice, SLICE_TAG), (uuid.UUID, UUID_TAG), (object, PYOBJECT_TAG)] ) tag_to_type = {tag: type_ for type_, tag in type_to_tag.items()} def slice_part(c): return '' if c is None else str(c) def encode_slice(s): parts = [slice_part(s.start), slice_part(s.stop), slice_part(s.step)] return ':'.join(parts) def decode_slice(x): return ut.smart_cast(x, slice) encoders = { UUID_TAG: str, SLICE_TAG: encode_slice, PYOBJECT_TAG: encode_pickle, } decoders = { UUID_TAG: uuid.UUID, SLICE_TAG: decode_slice, PYOBJECT_TAG: decode_pickle, } if not allow_pickle: del encoders[PYOBJECT_TAG] del decoders[PYOBJECT_TAG] type_ = tag_to_type[PYOBJECT_TAG] del tag_to_type[PYOBJECT_TAG] del type_to_tag[type_] class UtoolJSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, util_type.NUMPY_TYPE_TUPLE): return obj.tolist() elif six.PY3 and isinstance(obj, bytes): return obj.decode('utf-8') elif isinstance(obj, (set, frozenset)): return list(obj) # return json.JSONEncoder.default(self, list(obj)) # return [json.JSONEncoder.default(o) for o in obj] elif isinstance(obj, util_type.PRIMATIVE_TYPES): return json.JSONEncoder.default(self, obj) elif hasattr(obj, '__getstate__') and not isinstance(obj, uuid.UUID): return obj.__getstate__() else: for type_, tag in type_to_tag.items(): if isinstance(obj, type_): # print('----') # print('encoder obj = %r' % (obj,)) # print('encoder type_ = %r' % (type_,)) func = encoders[tag] text = func(obj) return {tag: text} raise TypeError('Invalid serialization type=%r' % (type(obj))) @classmethod def _json_object_hook(cls, value, verbose=False, **kwargs): if len(value) == 1: tag, text = list(value.items())[0] if tag in decoders: # print('----') # print('decoder tag = %r' % (tag,)) func = decoders[tag] obj = func(text) # print('decoder obj = %r' % (obj,)) return obj else: return value return value return UtoolJSONEncoder
[docs]def to_json(val, allow_pickle=False, pretty=False): r""" Converts a python object to a JSON string using the utool convention Args: val (object): Returns: str: json_str References: http://stackoverflow.com/questions/11561932/why-does-json-dumpslistnp CommandLine: python -m utool.util_cache --test-to_json python3 -m utool.util_cache --test-to_json Example: >>> # ENABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut >>> import numpy as np >>> import uuid >>> val = [ >>> '{"foo": "not a dict"}', >>> 1.3, >>> [1], >>> # {1: 1, 2: 2, 3: 3}, cant use integer keys >>> {1, 2, 3}, >>> slice(1, None, 1), >>> b'an ascii string', >>> np.array([1, 2, 3]), >>> ut.get_zero_uuid(), >>> ut.LazyDict(x='fo'), >>> ut.LazyDict, >>> {'x': {'a', 'b', 'cde'}, 'y': [1]} >>> ] >>> #val = ut.LazyDict(x='fo') >>> allow_pickle = True >>> if not allow_pickle: >>> val = val[:-2] >>> json_str = ut.to_json(val, allow_pickle=allow_pickle) >>> result = ut.repr3(json_str) >>> reload_val = ut.from_json(json_str, allow_pickle=allow_pickle) >>> # Make sure pickle doesnt happen by default >>> try: >>> json_str = ut.to_json(val) >>> assert False or not allow_pickle, 'expected a type error' >>> except TypeError: >>> print('Correctly got type error') >>> try: >>> json_str = ut.from_json(val) >>> assert False, 'expected a type error' >>> except TypeError: >>> print('Correctly got type error') >>> print(result) >>> print('original = ' + ut.repr3(val, nl=1)) >>> print('reconstructed = ' + ut.repr3(reload_val, nl=1)) >>> assert reload_val[6] == val[6].tolist() >>> assert reload_val[6] is not val[6] Example: >>> # test 3.7 safe uuid >>> import uuid >>> import utool as ut >>> ut.to_json([uuid.uuid4()]) """ UtoolJSONEncoder = make_utool_json_encoder(allow_pickle) json_kw = {} json_kw['cls'] = UtoolJSONEncoder if pretty: json_kw['indent'] = 4 json_kw['separators'] = (',', ': ') json_str = json.dumps(val, **json_kw) return json_str
[docs]def from_json(json_str, allow_pickle=False): """ Decodes a JSON object specified in the utool convention Args: json_str (str): allow_pickle (bool): (default = False) Returns: object: val CommandLine: python -m utool.util_cache from_json --show Example: >>> # ENABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut >>> json_str = 'just a normal string' >>> json_str = '["just a normal string"]' >>> allow_pickle = False >>> val = from_json(json_str, allow_pickle) >>> result = ('val = %s' % (ut.repr2(val),)) >>> print(result) """ if six.PY3: if isinstance(json_str, bytes): json_str = json_str.decode('utf-8') UtoolJSONEncoder = make_utool_json_encoder(allow_pickle) object_hook = UtoolJSONEncoder._json_object_hook val = json.loads(json_str, object_hook=object_hook) return val
[docs]def get_func_result_cachekey(func_, args_=tuple(), kwargs_={}): """ TODO: recursive partial definitions kwargs = {} args = ([],) """ import utool as ut # Rectify partials and whatnot true_args = args_ true_kwargs = kwargs_ true_func = func_ if isinstance(func_, partial): true_func = func_.func if func_.args is not None: true_args = tuple(list(func_.args) + list(args_)) if func_.keywords is not None: true_kwargs.update(func_.keywords) if ut.is_method(true_func): method = true_func true_func = method.im_func self = method.im_self true_args = tuple([self] + list(true_args)) # Build up cachekey funcname = ut.get_funcname(true_func) kwdefaults = ut.get_kwdefaults(true_func, parse_source=False) # kwdefaults = ut.get_kwdefaults(true_func, parse_source=True) argnames = ut.get_argnames(true_func) key_argx = None key_kwds = None func = true_func # NOQA args = true_args # NOQA kwargs = true_kwargs # NOQA args_key = ut.get_cfgstr_from_args( true_func, true_args, true_kwargs, key_argx, key_kwds, kwdefaults, argnames ) cachekey = funcname + '(' + args_key + ')' return cachekey
[docs]def cachestr_repr(val): """ Representation of an object as a cache string. """ try: memview = memoryview(val) return memview.tobytes() except Exception: try: return to_json(val) except Exception: # SUPER HACK if ( repr(val.__class__) == "<class 'wbia.control.IBEISControl.IBEISController'>" ): return val.get_dbname()
[docs]def get_cfgstr_from_args( func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames, use_hash=None ): """ Dev: argx = ['fdsf', '432443432432', 43423432, 'fdsfsd', 3.2, True] memlist = list(map(cachestr_repr, argx)) Ignore: argx = key_argx[0] argval = args[argx] val = argval %timeit repr(argval) %timeit to_json(argval) %timeit utool.hashstr(to_json(argval)) %timeit memoryview(argval) Example: >>> # DISABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut >>> use_hash = None >>> func = consensed_cfgstr >>> args = ('a', 'b', 'c', 'd') >>> kwargs = {} >>> key_argx = [0, 1, 2] >>> key_kwds = [] >>> kwdefaults = ut.util_inspect.get_kwdefaults(func) >>> argnames = ut.util_inspect.get_argnames(func) >>> get_cfgstr_from_args(func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames) Example: >>> # DISABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut >>> self = ut.LazyList >>> use_hash = None >>> func = self.append >>> args = ('a', 'b') >>> kwargs = {} >>> key_argx = [1] >>> key_kwds = [] >>> kwdefaults = ut.util_inspect.get_kwdefaults(func) >>> argnames = ut.util_inspect.get_argnames(func) >>> get_cfgstr_from_args(func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames) """ # try: # fmt_str = '%s(%s)' import utool as ut hashstr_ = util_hash.hashstr27 if key_argx is None: key_argx = list(range(len(args))) if key_kwds is None: key_kwds = ut.unique_ordered(list(kwdefaults.keys()) + list(kwargs.keys())) # def kwdval(key): # return kwargs.get(key, kwdefaults.get(key, None)) given_kwargs = ut.merge_dicts(kwdefaults, kwargs) arg_hashfmtstr = [argnames[argx] + '=(%s)' for argx in key_argx] # kw_hashfmtstr = [kwdefaults.get(key, '???') + '(%s)' for key in key_kwds] kw_hashfmtstr = [key + '=(%s)' for key in key_kwds] cfgstr_fmt = '_'.join(chain(arg_hashfmtstr, kw_hashfmtstr)) # print('cfgstr_fmt = %r' % cfgstr_fmt) argrepr_iter = (cachestr_repr(args[argx]) for argx in key_argx) kwdrepr_iter = (cachestr_repr(given_kwargs[key]) for key in key_kwds) if use_hash is None: # print('conditional hashing args') argcfg_list = [ hashstr_(argrepr) if len(argrepr) > 16 else argrepr for argrepr in argrepr_iter ] kwdcfg_list = [ hashstr_(kwdrepr) if len(kwdrepr) > 16 else kwdrepr for kwdrepr in kwdrepr_iter ] elif use_hash is True: # print('hashing args') argcfg_list = [hashstr_(argrepr) for argrepr in argrepr_iter] kwdcfg_list = [hashstr_(kwdrepr) for kwdrepr in kwdrepr_iter] else: argcfg_list = list(argrepr_iter) kwdcfg_list = list(kwdrepr_iter) # print('formating args and kwargs') cfgstr = cfgstr_fmt % tuple(chain(argcfg_list, kwdcfg_list)) # print('made cfgstr = %r' % cfgstr) return cfgstr
[docs]def cached_func( fname=None, cache_dir='default', appname='utool', key_argx=None, key_kwds=None, use_cache=None, verbose=None, ): r""" Wraps a function with a Cacher object uses a hash of arguments as input Args: fname (str): file name (defaults to function name) cache_dir (unicode): (default = u'default') appname (unicode): (default = u'utool') key_argx (None): (default = None) key_kwds (None): (default = None) use_cache (bool): turns on disk based caching(default = None) CommandLine: python -m utool.util_cache --exec-cached_func Example: >>> # ENABLE_DOCTEST >>> import utool as ut >>> def costly_func(a, b, c='d', *args, **kwargs): ... return ([a] * b, c, args, kwargs) >>> ans0 = costly_func(41, 3) >>> ans1 = costly_func(42, 3) >>> closure_ = ut.cached_func('costly_func', appname='utool_test', >>> key_argx=[0, 1]) >>> efficient_func = closure_(costly_func) >>> ans2 = efficient_func(42, 3) >>> ans3 = efficient_func(42, 3) >>> ans4 = efficient_func(41, 3) >>> ans5 = efficient_func(41, 3) >>> assert ans1 == ans2 >>> assert ans2 == ans3 >>> assert ans5 == ans4 >>> assert ans5 == ans0 >>> assert ans1 != ans0 """ if verbose is None: verbose = VERBOSE_CACHE def cached_closure(func): from utool import util_decor import utool as ut fname_ = util_inspect.get_funcname(func) if fname is None else fname kwdefaults = util_inspect.get_kwdefaults(func) argnames = util_inspect.get_argnames(func) if ut.is_method(func): # ignore self for methods argnames = argnames[1:] cacher = Cacher(fname_, cache_dir=cache_dir, appname=appname, verbose=verbose) if use_cache is None: use_cache_ = not util_arg.get_argflag('--nocache-' + fname_) else: use_cache_ = use_cache # _dbgdict = dict(fname_=fname_, key_kwds=key_kwds, appname=appname, # key_argx=key_argx, use_cache_=use_cache_) # @functools.wraps(func) def cached_wraper(*args, **kwargs): """ Cached Wrapper Function Additional Kwargs: use_cache (bool) : enables cache """ try: if verbose > 2: print('[util_cache] computing cached function fname_=%s' % (fname_,)) # Implicitly adds use_cache to kwargs cfgstr = get_cfgstr_from_args( func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames ) if util_cplat.WIN32: # remove potentially invalid chars cfgstr = '_' + util_hash.hashstr27(cfgstr) assert cfgstr is not None, 'cfgstr=%r cannot be None' % (cfgstr,) use_cache__ = kwargs.pop('use_cache', use_cache_) if use_cache__: # Make cfgstr from specified input data = cacher.tryload(cfgstr) if data is not None: return data # Cached missed compute function data = func(*args, **kwargs) # Cache save # if use_cache__: # TODO: save_cache cacher.save(data, cfgstr) return data # except ValueError as ex: # handle protocal error except Exception as ex: from utool import util_dbg _dbgdict2 = dict(key_argx=key_argx, lenargs=len(args), lenkw=len(kwargs)) msg = '\n'.join( [ '+--- UTOOL --- ERROR IN CACHED FUNCTION', #'dbgdict = ' + utool.repr4(_dbgdict), 'dbgdict2 = ' + util_str.repr4(_dbgdict2), ] ) util_dbg.printex(ex, msg) raise # Give function a handle to the cacher object cached_wraper = util_decor.preserve_sig(cached_wraper, func) cached_wraper.cacher = cacher return cached_wraper return cached_closure
# --- Global Cache ---
[docs]def view_global_cache_dir(appname='default'): import utool dir_ = utool.get_global_cache_dir(appname=appname) utool.view_directory(dir_)
[docs]def get_global_cache_dir(appname='default', ensure=False): """Returns (usually) writable directory for an application cache""" if appname is None or appname == 'default': appname = get_default_appname() global_cache_dir = util_cplat.get_app_resource_dir( appname, meta_util_constants.global_cache_dname ) if ensure: util_path.ensuredir(global_cache_dir) return global_cache_dir
[docs]def get_global_shelf_fpath(appname='default', ensure=False): """Returns the filepath to the global shelf""" global_cache_dir = get_global_cache_dir(appname, ensure=ensure) shelf_fpath = join(global_cache_dir, meta_util_constants.global_cache_fname) return shelf_fpath
[docs]def shelf_open(fpath): """ allows for shelf to be used in with statements References: http://stackoverflow.com/questions/7489732/easiest-way-to-add-a-function-to-existing-class CommandLine: python -m utool.util_cache --test-shelf_open Example: >>> # DISABLE_DOCTEST >>> # UNSTABLE_DOCTEST >>> import utool as ut >>> fpath = ut.unixjoin(ut.ensure_app_resource_dir('utool'), 'testshelf.shelf') >>> with ut.shelf_open(fpath) as dict_: ... print(ut.repr4(dict_)) """ return contextlib.closing(shelve.open(fpath))
# class YAWShelf(object): # def __init__(self, shelf_fpath): # self.shelf_fpath = shelf_fpath # import shelve # self.shelf = shelve.open(shelf_fpath)
[docs]class GlobalShelfContext(object): """older class. might need update""" def __init__(self, appname): self.appname = appname def __enter__(self): # self.shelf = get_global_shelf(self.appname) try: import dbm DBMError = dbm.error except Exception: DBMError = OSError try: shelf_fpath = get_global_shelf_fpath(self.appname, ensure=True) if VERBOSE: print('[cache] open: ' + shelf_fpath) self.shelf = shelve.open(shelf_fpath) except DBMError as ex: from utool import util_dbg util_dbg.printex( ex, 'Failed opening shelf_fpath due to bad version, remove and retry', key_list=['shelf_fpath'], ) import utool as ut ut.delete(shelf_fpath) self.shelf = shelve.open(shelf_fpath) except Exception as ex: from utool import util_dbg util_dbg.printex(ex, 'Failed opening shelf_fpath', key_list=['shelf_fpath']) raise return self.shelf def __exit__(self, type_, value, trace): self.shelf.close() if trace is not None: print('[cache] Error under GlobalShelfContext!: ' + str(value)) return False # return a falsey value on error
# close_global_shelf(self.appname)
[docs]def global_cache_read(key, appname='default', **kwargs): with GlobalShelfContext(appname) as shelf: if 'default' in kwargs: return shelf.get(key, kwargs['default']) else: return shelf[key]
[docs]def global_cache_dump(appname='default'): shelf_fpath = get_global_shelf_fpath(appname) print('shelf_fpath = %r' % shelf_fpath) with GlobalShelfContext(appname) as shelf: print(util_str.repr4(shelf))
[docs]def global_cache_write(key, val, appname='default'): """Writes cache files to a safe place in each operating system""" with GlobalShelfContext(appname) as shelf: shelf[key] = val
[docs]def delete_global_cache(appname='default'): """Reads cache files to a safe place in each operating system""" # close_global_shelf(appname) shelf_fpath = get_global_shelf_fpath(appname) util_path.remove_file(shelf_fpath, verbose=True, dryrun=False)
# import abc # abstract base class # import six # @six.add_metaclass(abc.ABCMeta)
[docs]class Cachable(object): """ Abstract base class. This class which enables easy caching of object dictionarys must implement get_cfgstr() """ ext = '.cPkl' # TODO: Capt'n Proto backend to replace pickle backend # @abc.abstractmethod
[docs] def get_cfgstr(self): return getattr(self, 'cfgstr', 'DEFAULT')
# return 'DEFAULT' # raise NotImplementedError('abstract method') # @abc.abstractmethod
[docs] def get_prefix(self): # import utool as ut return self.__class__.__name__ + '_'
# return ut.get_funcname(self.__class__) + '_' # raise NotImplementedError('abstract method')
[docs] def get_cachedir(self, cachedir=None): if cachedir is None: if hasattr(self, 'cachedir'): cachedir = self.cachedir else: cachedir = '.' return cachedir
[docs] def get_fname(self, cfgstr=None, ext=None): # convinience return basename(self.get_fpath('', cfgstr=cfgstr, ext=ext))
[docs] def get_fpath(self, cachedir=None, cfgstr=None, ext=None): """ Ignore: fname = _fname cfgstr = _cfgstr """ _dpath = self.get_cachedir(cachedir) _fname = self.get_prefix() _cfgstr = self.get_cfgstr() if cfgstr is None else cfgstr _ext = self.ext if ext is None else ext fpath = _args2_fpath(_dpath, _fname, _cfgstr, _ext) return fpath
[docs] def delete( self, cachedir=None, cfgstr=None, verbose=True or VERBOSE or util_arg.VERBOSE ): """ saves query result to directory """ fpath = self.get_fpath(cachedir, cfgstr=cfgstr) if verbose: print('[Cachable] cache delete: %r' % (basename(fpath),)) os.remove(fpath)
[docs] @profile def save( self, cachedir=None, cfgstr=None, verbose=VERBOSE, quiet=QUIET, ignore_keys=None ): """ saves query result to directory """ fpath = self.get_fpath(cachedir, cfgstr=cfgstr) if verbose: print('[Cachable] cache save: %r' % (basename(fpath),)) if hasattr(self, '__getstate__'): statedict = self.__getstate__() else: statedict = self.__dict__ if ignore_keys is None: save_dict = statedict else: save_dict = { key: val for (key, val) in six.iteritems(statedict) if key not in ignore_keys } util_io.save_data(fpath, save_dict) return fpath
# save_cache(cachedir, '', cfgstr, self.__dict__) # with open(fpath, 'wb') as file_: # pickle.dump(self.__dict__, file_) def _unsafe_load(self, fpath, ignore_keys=None): loaded_dict = util_io.load_data(fpath) if ignore_keys is not None: for key in ignore_keys: if key in loaded_dict: del loaded_dict[key] if hasattr(self, '__setstate__'): self.__setstate__(loaded_dict) else: self.__dict__.update(loaded_dict) # with open(fpath, 'rb') as file_: # loaded_dict = pickle.load(file_) # self.__dict__.update(loaded_dict)
[docs] def glob_valid_targets(self, cachedir=None, partial_cfgstr=''): from utool import util_path prefix = self.get_prefix() pattern = prefix + '*' + partial_cfgstr + '*' + self.ext cachedir = self.get_cachedir(cachedir) valid_targets = util_path.glob(cachedir, pattern, recursive=False) return valid_targets
[docs] def fuzzyload(self, cachedir=None, partial_cfgstr='', **kwargs): """ Try and load from a partially specified configuration string """ valid_targets = self.glob_valid_targets(cachedir, partial_cfgstr) if len(valid_targets) != 1: import utool as ut msg = 'need to further specify target. valid_targets=%s' % ( ut.repr3(valid_targets) ) raise ValueError(msg) fpath = valid_targets[0] self.load(fpath=fpath, **kwargs)
[docs] @profile def load( self, cachedir=None, cfgstr=None, fpath=None, verbose=None, quiet=QUIET, ignore_keys=None, ): """ Loads the result from the given database """ if verbose is None: verbose = getattr(self, 'verbose', VERBOSE) if fpath is None: fpath = self.get_fpath(cachedir, cfgstr=cfgstr) if verbose: print('[Cachable] cache tryload: %r' % (basename(fpath),)) try: self._unsafe_load(fpath, ignore_keys) if verbose: print('... self cache hit: %r' % (basename(fpath),)) except ValueError as ex: import utool as ut msg = '[!Cachable] Cachable(%s) is likely corrupt' % (self.get_cfgstr()) print('CORRUPT fpath = %s' % (fpath,)) ut.printex(ex, msg, iswarning=True) raise # except BadZipFile as ex: except zipfile.error as ex: import utool as ut msg = '[!Cachable] Cachable(%s) has bad zipfile' % (self.get_cfgstr()) print('CORRUPT fpath = %s' % (fpath,)) ut.printex(ex, msg, iswarning=True) raise # if exists(fpath): # #print('[Cachable] Removing corrupted file: %r' % fpath) # #os.remove(fpath) # raise hsexcept.HotsNeedsRecomputeError(msg) # else: # raise Exception(msg) except IOError as ex: import utool as ut if not exists(fpath): msg = '... self cache miss: %r' % (basename(fpath),) if verbose: print(msg) raise print('CORRUPT fpath = %s' % (fpath,)) msg = '[!Cachable] Cachable(%s) is corrupt' % (self.get_cfgstr()) ut.printex(ex, msg, iswarning=True) raise except Exception as ex: import utool as ut ut.printex(ex, 'unknown exception while loading query result') raise
[docs]def get_lru_cache(max_size=5): """ Args: max_size (int): References: https://github.com/amitdev/lru-dict CommandLine: python -m utool.util_cache --test-get_lru_cache Example: >>> # DISABLE_DOCTEST >>> # UNSTABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut # NOQA >>> max_size = 5 >>> # execute function >>> cache_obj = get_lru_cache(max_size) >>> cache_obj[1] = 1 >>> cache_obj[2] = 2 >>> cache_obj[3] = 3 >>> cache_obj[4] = 4 >>> cache_obj[5] = 5 >>> cache_obj[6] = 6 >>> # verify results >>> result = ut.repr2(dict(cache_obj), nl=False) >>> print(result) {2: 2, 3: 3, 4: 4, 5: 5, 6: 6} """ USE_C_LRU = False if USE_C_LRU: import lru cache_obj = lru.LRU(max_size) else: cache_obj = LRUDict(max_size) return cache_obj
[docs]class LRUDict(object): """ Pure python implementation for lru cache fallback References: http://www.kunxi.org/blog/2014/05/lru-cache-in-python/ Args: max_size (int): (default = 5) Returns: LRUDict: cache_obj CommandLine: python -m utool.util_cache --test-LRUDict Example: >>> # ENABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> max_size = 5 >>> self = LRUDict(max_size) >>> for count in range(0, 5): ... self[count] = count >>> print(self) >>> self[0] >>> for count in range(5, 8): ... self[count] = count >>> print(self) >>> del self[5] >>> assert 4 in self >>> result = ('self = %r' % (self,)) >>> print(result) self = LRUDict({ 4: 4, 0: 0, 6: 6, 7: 7, }) """ def __init__(self, max_size): self._max_size = max_size self._cache = collections.OrderedDict()
[docs] def has_key(self, item): return item in self
def __contains__(self, item): return item in self._cache def __delitem__(self, key): del self._cache[key] def __str__(self): import utool as ut return ut.repr4(self._cache, nl=False) def __repr__(self): import utool as ut return 'LRUDict(' + ut.repr4(self._cache) + ')' # return repr(self._cache) def __iter__(self): return iter(self._cache)
[docs] def items(self): return self._cache.items()
[docs] def keys(self): return self._cache.keys()
[docs] def values(self): return self._cache.values()
[docs] def iteritems(self): return self._cache.iteritems()
[docs] def iterkeys(self): return self._cache.iterkeys()
[docs] def itervalues(self): return self._cache.itervalues()
[docs] def clear(self): return self._cache.clear()
def __len__(self): return len(self._cache) def __getitem__(self, key): try: value = self._cache.pop(key) self._cache[key] = value return value except KeyError: raise def __setitem__(self, key, value): try: self._cache.pop(key) except KeyError: if len(self._cache) >= self._max_size: self._cache.popitem(last=False) self._cache[key] = value
[docs]def time_different_diskstores(): """ %timeit shelf_write_test() # 15.1 ms per loop %timeit cPickle_write_test() # 1.26 ms per loop %timeit shelf_read_test() # 8.77 ms per loop %timeit cPickle_read_test() # 2.4 ms per loop %timeit cPickle_read_test2() # 2.35 ms per loop %timeit json_read_test() %timeit json_write_test() """ import utool as ut import simplejson as json shelf_path = 'test.shelf' json_path = 'test.json' cpkl_path = 'test.pkl' size = 1000 dict_ = {str(key): str(uuid.uuid4()) for key in range(size)} ut.delete(cpkl_path) ut.delete(json_path) ut.delete(shelf_path) def shelf_write_test(): with ut.shelf_open(shelf_path) as shelf_dict: shelf_dict.update(dict_) def shelf_read_test(): with ut.shelf_open(shelf_path) as shelf_dict: test = {key: val for key, val in six.iteritems(shelf_dict)} assert len(test) > 0 def json_write_test(): with open(json_path, 'wb') as outfile: json.dump(dict_, outfile) def cPickle_write_test(): with open(cpkl_path, 'wb') as outfile: pickle.dump(dict_, outfile) def cPickle_read_test(): with open(cpkl_path, 'rb') as outfile: test = {key: val for key, val in six.iteritems(pickle.load(outfile))} assert len(test) > 0 def cPickle_read_test2(): with open(cpkl_path, 'rb') as outfile: test = pickle.load(outfile) assert len(test) > 0 shelf_write_test() shelf_read_test() # json_write_test() # json_read_test() cPickle_write_test() cPickle_read_test() cPickle_read_test2()
[docs]class KeyedDefaultDict(util_dict.DictLike): def __init__(self, default_func, *args, **kwargs): self._default_func = default_func self._args = args self._kwargs = kwargs self._internal = {}
[docs] def setitem(self, key, value): self._internal[key] = value
[docs] def getitem(self, key): if key not in self._internal: value = self._default_func(key, *self._args, **self._kwargs) self._internal[key] = value return self._internal[key]
[docs] def keys(self): return self._internal.keys()
[docs] def values(self): return self._internal.values()
# @six.add_metaclass(util_class.ReloadingMetaclass)
[docs]@util_class.reloadable_class class LazyDict(object): # class LazyDict(collections.Mapping): """ Hacky dictionary where values that are functions are counted as lazy CommandLine: python -m utool.util_cache --exec-LazyDict Example: >>> # ENABLE_DOCTEST >>> from utool.util_cache import * # NOQA >>> import utool as ut >>> self = ut.LazyDict() >>> self['foo'] = lambda: 5 >>> self['bar'] = 4 >>> try: >>> self['foo'] = lambda: 9 >>> assert False, 'should not be able to override computable functions' >>> except ValueError: >>> pass >>> self['biz'] = lambda: 9 >>> d = {} >>> d.update(**self) >>> self['spam'] = lambda: 'eggs' >>> self.printinfo() >>> print(self.tostring(is_eager=False)) """ def __init__( self, other=None, is_eager=True, verbose=False, reprkw=None, mutable=False, **kwargs ): # Registered lazy evaluations self._eval_funcs = {} # Computed results self._stored_results = {} self.infer_lazy_vals_hack = True self._is_eager = is_eager self._verbose = verbose self.reprkw = dict(is_eager=False, nl=False) self._mutable = mutable if reprkw is not None: self.reprkw.update(**reprkw) if other is not None: self.update(other) if len(kwargs) > 0: self.update(kwargs) # --- direct interface
[docs] def set_lazy_func(self, key, func): assert util_type.is_funclike(func), 'func must be a callable' # if key in self._stored_results: # raise ValueError( # ('Cannot add new lazy function for key=%r' # 'that has been computed') % (key,)) # if key in self._stored_results: if not self._mutable and key in self.reconstructable_keys(): raise ValueError(('Cannot overwrite lazy function for key=%r') % (key,)) self._eval_funcs[key] = func
[docs] def setitem(self, key, value): # HACK, lazy funcs should all be registered # this should should always just set a value if not self._mutable and key in self.reconstructable_keys(): raise ValueError(('Cannot overwrite lazy function for key=%r') % (key,)) if self.infer_lazy_vals_hack and util_type.is_funclike(value): self.set_lazy_func(key, value) else: self._stored_results[key] = value
[docs] def getitem(self, key, is_eager=None): if is_eager is None: is_eager = self._is_eager if is_eager: return self.eager_eval(key) else: return self.lazy_eval(key)
[docs] def nocache_eval(self, key): """forces function evaluation""" func_ = self._eval_funcs[key] value = func_() return value
[docs] def eager_eval(self, key): if key in self._stored_results: value = self._stored_results[key] else: if self._verbose: print('[util_cache] Evaluating key=%r' % (key,)) value = self.nocache_eval(key) self._stored_results[key] = value return value
[docs] def lazy_eval(self, key): if key in self._stored_results: value = self._stored_results[key] else: value = self._eval_funcs[key] return value
[docs] def clear_evaluated(self): for key in list(self.evaluated_keys()): del self._stored_results[key]
[docs] def clear_stored(self, keys=None): if keys is None: keys = list(self.stored_keys()) for key in keys: del self._stored_results[key]
[docs] def stored_keys(self): """keys whose vals that have been explicitly set or evaluated""" return self._stored_results.keys()
[docs] def reconstructable_keys(self): """only keys whose vals that have been set with a backup func""" return set(self._eval_funcs.keys())
[docs] def all_keys(self): return set(self.stored_keys()).union(set(self.reconstructable_keys()))
[docs] def unevaluated_keys(self): """keys whose vals can be constructed but have not been""" return set(self.reconstructable_keys()) - set(self.stored_keys())
[docs] def evaluated_keys(self): """only keys whose vals have been evaluated from a stored function""" return set(self.reconstructable_keys()) - set(self.unevaluated_keys())
[docs] def nonreconstructable_keys(self): """only keys whose vals that have been explicitly set without a backup func""" return set(self.all_keys()) - self.reconstructable_keys()
[docs] def cached_keys(self): """only keys whose vals that have been explicitly set without a backup func""" return set(self.nonreconstructable_keys()).union(set(self.evaluated_keys()))
[docs] def printinfo(self): print('nonreconstructable_keys = %s' % (self.nonreconstructable_keys(),)) print('reconstructable_keys = %s' % (self.reconstructable_keys(),)) print('evaluated_keys = %s' % (self.evaluated_keys(),)) print('unevaluated_keys = %s' % (self.unevaluated_keys(),))
[docs] def asdict(self, is_eager=None): dict_ = {key: self.getitem(key, is_eager) for key in self.keys()} return dict_
[docs] def tostring(self, is_eager=None, keys=None, **kwargs): import utool as ut dict_ = self.asdict(is_eager=is_eager) class AwakeFaceRepr(object): def __repr__(self): return '!' # return '(o.o)' # return "٩(ˊᗜˋ*)و" class SleepFaceRepr(object): def __repr__(self): return 'z' # return '(-_-)' # return '(ᵕ≀ᵕ)' for key in self.evaluated_keys(): # dict_[key] = '!' dict_[key] = AwakeFaceRepr() for key in self.unevaluated_keys(): # dict_[key] = 'z' dict_[key] = SleepFaceRepr() if keys is not None: dict_ = ut.dict_subset(dict_, keys) return ut.repr2(dict_, **kwargs)
# --- dict interface
[docs] def get(self, key, *d): if len(d) > 1: raise ValueError('can only specify one default') elif len(d) == 1: # assert len(d) == 0, 'no support for default yet' if key not in self: return d[0] return self.getitem(key, self._is_eager)
[docs] def update(self, dict_, **kwargs): for key, val in six.iteritems(dict_): self[key] = val for key, val in six.iteritems(kwargs): self[key] = val
[docs] def keys(self): return self.all_keys()
[docs] def values(self): return [self[key] for key in self.keys()]
[docs] def items(self): return [(key, self[key]) for key in self.keys()]
def __setitem__(self, key, value): self.setitem(key, value) def __getitem__(self, key): return self.get(key) def __delitem__(self, key): if key not in self.keys(): raise KeyError(key) if key in self._eval_funcs: del self._eval_funcs[key] if key in self._stored_results: del self._stored_results[key] def __iter__(self): return iter(self.keys()) def __len__(self): return len(self.keys()) def __str__(self): return self.tostring() def __repr__(self): return self.tostring(**self.reprkw)
# def __getstate__(self): # state_dict = self.asdict() # return state_dict # def __setstate__(self, state_dict): # self._stored_results.update(state_dict)
[docs]@six.add_metaclass(util_class.ReloadingMetaclass) class LazyList(object): """very hacky list implemented as a dictionary""" def __init__(self, **kwargs): self._hackstore = LazyDict(**kwargs) def __len__(self): return len(self._hackstore) def __getitem__(self, index): try: return self._hackstore[index] except KeyError: # raise ValueError('index=%r out of bounds' % (index,)) raise ValueError( 'index=%r out of bounds or error computing lazy value.' % (index,) )
[docs] def append(self, item): self._hackstore[len(self._hackstore)] = item
[docs] def tolist(self): return self._hackstore.values()
def __iter__(self): for index in range(len(self)): yield self[index]
if __name__ == '__main__': """ CommandLine: python -c "import utool, utool.util_cache; utool.doctest_funcs(utool.util_cache)" python -m utool.util_cache python -m utool.util_cache --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()