# -*- coding: utf-8 -*-
"""
This module needs serious refactoring and testing
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import shelve
import six
import uuid
import json
import codecs
import os
# import lru
# git+https://github.com/amitdev/lru-dict
# import atexit
# import inspect
import contextlib
import collections
from six.moves import cPickle as pickle # NOQA
from six.moves import range, zip
from os.path import join, normpath, basename, exists
from functools import partial
from itertools import chain
import zipfile
from utool import util_arg
from utool import util_hash
from utool import util_inject
from utool import util_path
from utool import util_io
from utool import util_str
from utool import util_cplat
from utool import util_inspect
from utool import util_list
from utool import util_class
from utool import util_type
# from utool import util_decor
from utool import util_dict
from utool._internal import meta_util_constants
print, rrr, profile = util_inject.inject2(__name__)
# TODO: Remove globalness
VERBOSE = util_arg.VERBOSE
QUIET = util_arg.QUIET
VERBOSE_CACHE = util_arg.NOT_QUIET
USE_CACHE = not util_arg.get_argflag('--nocache')
__APPNAME__ = meta_util_constants.default_appname # the global application name
[docs]class CacheMissException(Exception):
pass
# class YACacher(object):
# @six.add_metaclass(util_class.ReloadingMetaclass)
[docs]@util_class.reloadable_class
class ShelfCacher(object):
"""yet another cacher"""
def __init__(self, fpath, enabled=True):
self.verbose = True
if self.verbose:
print('[shelfcache] initializing()')
self.fpath = fpath
self.shelf = None if not enabled else shelve.open(fpath)
def __del__(self):
self.close()
def __getitem__(self, cachekey):
return self.load(cachekey)
def __setitem__(self, cachekey, data):
return self.save(cachekey, data)
[docs] def keys(self):
return self.shelf.keys()
[docs] def load(self, cachekey):
if self.verbose:
print('[shelfcache] loading %s' % (cachekey,))
cachekey = cachekey.encode('ascii')
if self.shelf is None or cachekey not in self.shelf:
raise CacheMissException(
'Cache miss cachekey=%r self.fpath=%r' % (cachekey, self.fpath)
)
else:
return self.shelf[cachekey]
[docs] def save(self, cachekey, data):
if self.verbose:
print('[shelfcache] saving %s' % (cachekey,))
cachekey = cachekey.encode('ascii')
if self.shelf is not None:
self.shelf[cachekey] = data
self.shelf.sync()
[docs] def clear(self):
if self.verbose:
print('[shelfcache] clearing cache')
self.shelf.clear()
self.shelf.sync()
[docs] def close(self):
if self.verbose:
print('[shelfcache] closing()')
if self.shelf is not None:
self.shelf.close()
[docs]def get_default_appname():
global __APPNAME__
return __APPNAME__
[docs]def text_dict_read(fpath):
try:
with open(fpath, 'r') as file_:
dict_text = file_.read()
except IOError:
dict_text = '{}'
try:
dict_ = eval(dict_text, {}, {})
except SyntaxError as ex:
import utool as ut
print(dict_text)
ut.printex(ex, 'Bad Syntax', keys=['dict_text'])
dict_ = {}
if util_arg.SUPER_STRICT:
raise
return dict_
[docs]def text_dict_write(fpath, dict_):
"""
Very naive, but readable way of storing a dictionary on disk
FIXME: This broke on RoseMary's big dataset. Not sure why. It gave bad
syntax. And the SyntaxError did not seem to be excepted.
"""
# dict_ = text_dict_read(fpath)
# dict_[key] = val
dict_text2 = util_str.repr4(dict_, strvals=False)
if VERBOSE:
print('[cache] ' + str(dict_text2))
util_io.write_to(fpath, dict_text2)
[docs]def consensed_cfgstr(prefix, cfgstr, max_len=128, cfgstr_hashlen=16):
if len(prefix) + len(cfgstr) > max_len:
hashed_cfgstr = util_hash.hashstr27(cfgstr, hashlen=cfgstr_hashlen)
# Hack for prettier names
if not prefix.endswith('_'):
fname_cfgstr = prefix + '_' + hashed_cfgstr
else:
fname_cfgstr = prefix + hashed_cfgstr
else:
fname_cfgstr = prefix + cfgstr
return fname_cfgstr
def _args2_fpath(dpath, fname, cfgstr, ext):
r"""
Ensures that the filename is not too long
Internal util_cache helper function
Windows MAX_PATH=260 characters
Absolute length is limited to 32,000 characters
Each filename component is limited to 255 characters
Args:
dpath (str):
fname (str):
cfgstr (str):
ext (str):
Returns:
str: fpath
CommandLine:
python -m utool.util_cache --test-_args2_fpath
Example:
>>> # ENABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> from utool.util_cache import _args2_fpath
>>> import utool as ut
>>> dpath = 'F:\\data\\work\\PZ_MTEST\\_ibsdb\\_wbia_cache'
>>> fname = 'normalizer_'
>>> cfgstr = u'PZ_MTEST_DSUUIDS((9)67j%dr%&bl%4oh4+)_QSUUIDS((9)67j%dr%&bl%4oh4+)zebra_plains_vsone_NN(single,K1+1,last,cks1024)_FILT(ratio<0.625;1.0,fg;1.0)_SV(0.01;2;1.57minIn=4,nRR=50,nsum,)_AGG(nsum)_FLANN(4_kdtrees)_FEATWEIGHT(ON,uselabel,rf)_FEAT(hesaff+sift_)_CHIP(sz450)'
>>> ext = '.cPkl'
>>> fpath = _args2_fpath(dpath, fname, cfgstr, ext)
>>> result = str(ut.ensure_unixslash(fpath))
>>> target = 'F:/data/work/PZ_MTEST/_ibsdb/_wbia_cache/normalizer_xfylfboirymmcpfg.cPkl'
>>> ut.assert_eq(result, target)
"""
if len(ext) > 0 and ext[0] != '.':
raise ValueError('Please be explicit and use a dot in ext')
max_len = 128
# should hashlen be larger?
cfgstr_hashlen = 16
prefix = fname
fname_cfgstr = consensed_cfgstr(
prefix, cfgstr, max_len=max_len, cfgstr_hashlen=cfgstr_hashlen
)
fpath = join(dpath, fname_cfgstr + ext)
fpath = normpath(fpath)
return fpath
[docs]def save_cache(dpath, fname, cfgstr, data, ext='.cPkl', verbose=None):
"""
Saves data using util_io, but smartly constructs a filename
"""
fpath = _args2_fpath(dpath, fname, cfgstr, ext)
util_io.save_data(fpath, data, verbose=verbose)
return fpath
[docs]def load_cache(dpath, fname, cfgstr, ext='.cPkl', verbose=None, enabled=True):
"""
Loads data using util_io, but smartly constructs a filename
"""
if verbose is None:
verbose = VERBOSE_CACHE
if not USE_CACHE or not enabled:
if verbose > 1:
print(
'[util_cache] ... cache disabled: dpath=%s cfgstr=%r'
% (basename(dpath), cfgstr)
)
raise IOError(3, 'Cache Loading Is Disabled')
fpath = _args2_fpath(dpath, fname, cfgstr, ext)
if not exists(fpath):
if verbose > 0:
print(
'[util_cache] ... cache does not exist: dpath=%r fname=%r cfgstr=%r'
% (basename(dpath), fname, cfgstr)
)
raise IOError(2, 'No such file or directory: %r' % (fpath,))
else:
if verbose > 2:
print(
'[util_cache] ... cache exists: dpath=%r fname=%r cfgstr=%r'
% (basename(dpath), fname, cfgstr)
)
import utool as ut
nbytes = ut.get_file_nBytes(fpath)
big_verbose = (nbytes > 1e6 and verbose > 2) or verbose > 2
if big_verbose:
print('[util_cache] About to read file of size %s' % (ut.byte_str2(nbytes),))
try:
with ut.Timer(fpath, verbose=big_verbose and verbose > 3):
data = util_io.load_data(fpath, verbose=verbose > 2)
except (EOFError, IOError, ImportError) as ex:
print('CORRUPTED? fpath = %s' % (fpath,))
if verbose > 1:
print(
'[util_cache] ... cache miss dpath=%s cfgstr=%r'
% (basename(dpath), cfgstr)
)
raise IOError(str(ex))
except Exception:
print('CORRUPTED? fpath = %s' % (fpath,))
raise
else:
if verbose > 2:
print('[util_cache] ... cache hit')
return data
[docs]def tryload_cache(dpath, fname, cfgstr, verbose=None):
"""
returns None if cache cannot be loaded
"""
try:
return load_cache(dpath, fname, cfgstr, verbose=verbose)
except IOError:
return None
[docs]@profile
def tryload_cache_list(dpath, fname, cfgstr_list, verbose=False):
"""
loads a list of similar cached datas. Returns flags that needs to be computed
"""
data_list = [tryload_cache(dpath, fname, cfgstr, verbose) for cfgstr in cfgstr_list]
ismiss_list = [data is None for data in data_list]
return data_list, ismiss_list
[docs]@profile
def tryload_cache_list_with_compute(
use_cache, dpath, fname, cfgstr_list, compute_fn, *args
):
"""
tries to load data, but computes it if it can't give a compute function
"""
# Load precomputed values
if use_cache is False:
data_list = [None] * len(cfgstr_list)
ismiss_list = [True] * len(cfgstr_list)
# Don't load or save, just compute
data_list = compute_fn(ismiss_list, *args)
return data_list
else:
data_list, ismiss_list = tryload_cache_list(
dpath, fname, cfgstr_list, verbose=False
)
num_total = len(cfgstr_list)
if any(ismiss_list):
# Compute missing values
newdata_list = compute_fn(ismiss_list, *args)
newcfgstr_list = util_list.compress(cfgstr_list, ismiss_list)
index_list = util_list.list_where(ismiss_list)
print(
'[cache] %d/%d cache hits for %s in %s'
% (num_total - len(index_list), num_total, fname, util_path.tail(dpath))
)
# Cache write
for newcfgstr, newdata in zip(newcfgstr_list, newdata_list):
save_cache(dpath, fname, newcfgstr, newdata, verbose=False)
# Populate missing result
for index, newdata in zip(index_list, newdata_list):
data_list[index] = newdata
else:
print(
'[cache] %d/%d cache hits for %s in %s'
% (num_total, num_total, fname, util_path.tail(dpath))
)
return data_list
[docs]class Cacher(object):
"""
old non inhertable version of cachable
"""
def __init__(
self,
fname,
cfgstr=None,
cache_dir='default',
appname='utool',
ext='.cPkl',
verbose=None,
enabled=True,
):
if verbose is None:
verbose = VERBOSE
if cache_dir == 'default':
cache_dir = util_cplat.get_app_resource_dir(appname)
util_path.ensuredir(cache_dir)
self.dpath = cache_dir
self.fname = fname
self.cfgstr = cfgstr
self.verbose = verbose
self.ext = ext
self.enabled = enabled
[docs] def get_fpath(self):
fpath = _args2_fpath(self.dpath, self.fname, self.cfgstr, self.ext)
return fpath
[docs] def existing_versions(self):
"""
Returns data with different cfgstr values that were previously computed
with this cacher.
"""
import glob
pattern = self.fname + '_*' + self.ext
for fname in glob.glob1(self.dpath, pattern):
fpath = join(self.dpath, fname)
yield fpath
[docs] def exists(self, cfgstr=None):
return exists(self.get_fpath())
[docs] def load(self, cfgstr=None):
cfgstr = self.cfgstr if cfgstr is None else cfgstr
# assert cfgstr is not None, 'must specify cfgstr in constructor or call'
if cfgstr is None:
import warnings
warnings.warn('No cfgstr given in Cacher constructor or call')
cfgstr = ''
assert self.fname is not None, 'no fname'
assert self.dpath is not None, 'no dpath'
# TODO: use the computed fpath from this object instead
data = load_cache(
self.dpath,
self.fname,
cfgstr,
self.ext,
verbose=self.verbose,
enabled=self.enabled,
)
if self.verbose > 1:
print('[cache] ... ' + self.fname + ' Cacher hit')
return data
[docs] def tryload(self, cfgstr=None):
"""
Like load, but returns None if the load fails
"""
if cfgstr is None:
cfgstr = self.cfgstr
if cfgstr is None:
import warnings
warnings.warn('No cfgstr given in Cacher constructor or call')
cfgstr = ''
# assert cfgstr is not None, (
# 'must specify cfgstr in constructor or call')
if not self.enabled:
if self.verbose > 0:
print('[cache] ... %s Cacher disabled' % (self.fname))
return None
try:
if self.verbose > 1:
print('[cache] tryload fname=%s' % (self.fname,))
# if self.verbose > 2:
# print('[cache] cfgstr=%r' % (cfgstr,))
return self.load(cfgstr)
except IOError:
if self.verbose > 0:
print('[cache] ... %s Cacher miss' % (self.fname))
[docs] def ensure(self, func, *args, **kwargs):
data = self.tryload()
if data is None:
data = func(*args, **kwargs)
self.save(data)
return data
[docs] def save(self, data, cfgstr=None):
if not self.enabled:
return
cfgstr = self.cfgstr if cfgstr is None else cfgstr
# assert cfgstr is not None, 'must specify cfgstr in constructor or call'
if cfgstr is None:
import warnings
warnings.warn('No cfgstr given in Cacher constructor or call')
cfgstr = ''
assert self.fname is not None, 'no fname'
assert self.dpath is not None, 'no dpath'
if self.verbose > 0:
print('[cache] ... ' + self.fname + ' Cacher save')
save_cache(self.dpath, self.fname, cfgstr, data, self.ext)
# @util_decor.memoize
[docs]def to_json(val, allow_pickle=False, pretty=False):
r"""
Converts a python object to a JSON string using the utool convention
Args:
val (object):
Returns:
str: json_str
References:
http://stackoverflow.com/questions/11561932/why-does-json-dumpslistnp
CommandLine:
python -m utool.util_cache --test-to_json
python3 -m utool.util_cache --test-to_json
Example:
>>> # ENABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut
>>> import numpy as np
>>> import uuid
>>> val = [
>>> '{"foo": "not a dict"}',
>>> 1.3,
>>> [1],
>>> # {1: 1, 2: 2, 3: 3}, cant use integer keys
>>> {1, 2, 3},
>>> slice(1, None, 1),
>>> b'an ascii string',
>>> np.array([1, 2, 3]),
>>> ut.get_zero_uuid(),
>>> ut.LazyDict(x='fo'),
>>> ut.LazyDict,
>>> {'x': {'a', 'b', 'cde'}, 'y': [1]}
>>> ]
>>> #val = ut.LazyDict(x='fo')
>>> allow_pickle = True
>>> if not allow_pickle:
>>> val = val[:-2]
>>> json_str = ut.to_json(val, allow_pickle=allow_pickle)
>>> result = ut.repr3(json_str)
>>> reload_val = ut.from_json(json_str, allow_pickle=allow_pickle)
>>> # Make sure pickle doesnt happen by default
>>> try:
>>> json_str = ut.to_json(val)
>>> assert False or not allow_pickle, 'expected a type error'
>>> except TypeError:
>>> print('Correctly got type error')
>>> try:
>>> json_str = ut.from_json(val)
>>> assert False, 'expected a type error'
>>> except TypeError:
>>> print('Correctly got type error')
>>> print(result)
>>> print('original = ' + ut.repr3(val, nl=1))
>>> print('reconstructed = ' + ut.repr3(reload_val, nl=1))
>>> assert reload_val[6] == val[6].tolist()
>>> assert reload_val[6] is not val[6]
Example:
>>> # test 3.7 safe uuid
>>> import uuid
>>> import utool as ut
>>> ut.to_json([uuid.uuid4()])
"""
UtoolJSONEncoder = make_utool_json_encoder(allow_pickle)
json_kw = {}
json_kw['cls'] = UtoolJSONEncoder
if pretty:
json_kw['indent'] = 4
json_kw['separators'] = (',', ': ')
json_str = json.dumps(val, **json_kw)
return json_str
[docs]def from_json(json_str, allow_pickle=False):
"""
Decodes a JSON object specified in the utool convention
Args:
json_str (str):
allow_pickle (bool): (default = False)
Returns:
object: val
CommandLine:
python -m utool.util_cache from_json --show
Example:
>>> # ENABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut
>>> json_str = 'just a normal string'
>>> json_str = '["just a normal string"]'
>>> allow_pickle = False
>>> val = from_json(json_str, allow_pickle)
>>> result = ('val = %s' % (ut.repr2(val),))
>>> print(result)
"""
if six.PY3:
if isinstance(json_str, bytes):
json_str = json_str.decode('utf-8')
UtoolJSONEncoder = make_utool_json_encoder(allow_pickle)
object_hook = UtoolJSONEncoder._json_object_hook
val = json.loads(json_str, object_hook=object_hook)
return val
[docs]def get_func_result_cachekey(func_, args_=tuple(), kwargs_={}):
"""
TODO: recursive partial definitions
kwargs = {}
args = ([],)
"""
import utool as ut
# Rectify partials and whatnot
true_args = args_
true_kwargs = kwargs_
true_func = func_
if isinstance(func_, partial):
true_func = func_.func
if func_.args is not None:
true_args = tuple(list(func_.args) + list(args_))
if func_.keywords is not None:
true_kwargs.update(func_.keywords)
if ut.is_method(true_func):
method = true_func
true_func = method.im_func
self = method.im_self
true_args = tuple([self] + list(true_args))
# Build up cachekey
funcname = ut.get_funcname(true_func)
kwdefaults = ut.get_kwdefaults(true_func, parse_source=False)
# kwdefaults = ut.get_kwdefaults(true_func, parse_source=True)
argnames = ut.get_argnames(true_func)
key_argx = None
key_kwds = None
func = true_func # NOQA
args = true_args # NOQA
kwargs = true_kwargs # NOQA
args_key = ut.get_cfgstr_from_args(
true_func, true_args, true_kwargs, key_argx, key_kwds, kwdefaults, argnames
)
cachekey = funcname + '(' + args_key + ')'
return cachekey
[docs]def cachestr_repr(val):
"""
Representation of an object as a cache string.
"""
try:
memview = memoryview(val)
return memview.tobytes()
except Exception:
try:
return to_json(val)
except Exception:
# SUPER HACK
if (
repr(val.__class__)
== "<class 'wbia.control.IBEISControl.IBEISController'>"
):
return val.get_dbname()
[docs]def get_cfgstr_from_args(
func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames, use_hash=None
):
"""
Dev:
argx = ['fdsf', '432443432432', 43423432, 'fdsfsd', 3.2, True]
memlist = list(map(cachestr_repr, argx))
Ignore:
argx = key_argx[0]
argval = args[argx]
val = argval
%timeit repr(argval)
%timeit to_json(argval)
%timeit utool.hashstr(to_json(argval))
%timeit memoryview(argval)
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut
>>> use_hash = None
>>> func = consensed_cfgstr
>>> args = ('a', 'b', 'c', 'd')
>>> kwargs = {}
>>> key_argx = [0, 1, 2]
>>> key_kwds = []
>>> kwdefaults = ut.util_inspect.get_kwdefaults(func)
>>> argnames = ut.util_inspect.get_argnames(func)
>>> get_cfgstr_from_args(func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames)
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut
>>> self = ut.LazyList
>>> use_hash = None
>>> func = self.append
>>> args = ('a', 'b')
>>> kwargs = {}
>>> key_argx = [1]
>>> key_kwds = []
>>> kwdefaults = ut.util_inspect.get_kwdefaults(func)
>>> argnames = ut.util_inspect.get_argnames(func)
>>> get_cfgstr_from_args(func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames)
"""
# try:
# fmt_str = '%s(%s)'
import utool as ut
hashstr_ = util_hash.hashstr27
if key_argx is None:
key_argx = list(range(len(args)))
if key_kwds is None:
key_kwds = ut.unique_ordered(list(kwdefaults.keys()) + list(kwargs.keys()))
# def kwdval(key):
# return kwargs.get(key, kwdefaults.get(key, None))
given_kwargs = ut.merge_dicts(kwdefaults, kwargs)
arg_hashfmtstr = [argnames[argx] + '=(%s)' for argx in key_argx]
# kw_hashfmtstr = [kwdefaults.get(key, '???') + '(%s)' for key in key_kwds]
kw_hashfmtstr = [key + '=(%s)' for key in key_kwds]
cfgstr_fmt = '_'.join(chain(arg_hashfmtstr, kw_hashfmtstr))
# print('cfgstr_fmt = %r' % cfgstr_fmt)
argrepr_iter = (cachestr_repr(args[argx]) for argx in key_argx)
kwdrepr_iter = (cachestr_repr(given_kwargs[key]) for key in key_kwds)
if use_hash is None:
# print('conditional hashing args')
argcfg_list = [
hashstr_(argrepr) if len(argrepr) > 16 else argrepr
for argrepr in argrepr_iter
]
kwdcfg_list = [
hashstr_(kwdrepr) if len(kwdrepr) > 16 else kwdrepr
for kwdrepr in kwdrepr_iter
]
elif use_hash is True:
# print('hashing args')
argcfg_list = [hashstr_(argrepr) for argrepr in argrepr_iter]
kwdcfg_list = [hashstr_(kwdrepr) for kwdrepr in kwdrepr_iter]
else:
argcfg_list = list(argrepr_iter)
kwdcfg_list = list(kwdrepr_iter)
# print('formating args and kwargs')
cfgstr = cfgstr_fmt % tuple(chain(argcfg_list, kwdcfg_list))
# print('made cfgstr = %r' % cfgstr)
return cfgstr
[docs]def cached_func(
fname=None,
cache_dir='default',
appname='utool',
key_argx=None,
key_kwds=None,
use_cache=None,
verbose=None,
):
r"""
Wraps a function with a Cacher object
uses a hash of arguments as input
Args:
fname (str): file name (defaults to function name)
cache_dir (unicode): (default = u'default')
appname (unicode): (default = u'utool')
key_argx (None): (default = None)
key_kwds (None): (default = None)
use_cache (bool): turns on disk based caching(default = None)
CommandLine:
python -m utool.util_cache --exec-cached_func
Example:
>>> # ENABLE_DOCTEST
>>> import utool as ut
>>> def costly_func(a, b, c='d', *args, **kwargs):
... return ([a] * b, c, args, kwargs)
>>> ans0 = costly_func(41, 3)
>>> ans1 = costly_func(42, 3)
>>> closure_ = ut.cached_func('costly_func', appname='utool_test',
>>> key_argx=[0, 1])
>>> efficient_func = closure_(costly_func)
>>> ans2 = efficient_func(42, 3)
>>> ans3 = efficient_func(42, 3)
>>> ans4 = efficient_func(41, 3)
>>> ans5 = efficient_func(41, 3)
>>> assert ans1 == ans2
>>> assert ans2 == ans3
>>> assert ans5 == ans4
>>> assert ans5 == ans0
>>> assert ans1 != ans0
"""
if verbose is None:
verbose = VERBOSE_CACHE
def cached_closure(func):
from utool import util_decor
import utool as ut
fname_ = util_inspect.get_funcname(func) if fname is None else fname
kwdefaults = util_inspect.get_kwdefaults(func)
argnames = util_inspect.get_argnames(func)
if ut.is_method(func):
# ignore self for methods
argnames = argnames[1:]
cacher = Cacher(fname_, cache_dir=cache_dir, appname=appname, verbose=verbose)
if use_cache is None:
use_cache_ = not util_arg.get_argflag('--nocache-' + fname_)
else:
use_cache_ = use_cache
# _dbgdict = dict(fname_=fname_, key_kwds=key_kwds, appname=appname,
# key_argx=key_argx, use_cache_=use_cache_)
# @functools.wraps(func)
def cached_wraper(*args, **kwargs):
"""
Cached Wrapper Function
Additional Kwargs:
use_cache (bool) : enables cache
"""
try:
if verbose > 2:
print('[util_cache] computing cached function fname_=%s' % (fname_,))
# Implicitly adds use_cache to kwargs
cfgstr = get_cfgstr_from_args(
func, args, kwargs, key_argx, key_kwds, kwdefaults, argnames
)
if util_cplat.WIN32:
# remove potentially invalid chars
cfgstr = '_' + util_hash.hashstr27(cfgstr)
assert cfgstr is not None, 'cfgstr=%r cannot be None' % (cfgstr,)
use_cache__ = kwargs.pop('use_cache', use_cache_)
if use_cache__:
# Make cfgstr from specified input
data = cacher.tryload(cfgstr)
if data is not None:
return data
# Cached missed compute function
data = func(*args, **kwargs)
# Cache save
# if use_cache__:
# TODO: save_cache
cacher.save(data, cfgstr)
return data
# except ValueError as ex:
# handle protocal error
except Exception as ex:
from utool import util_dbg
_dbgdict2 = dict(key_argx=key_argx, lenargs=len(args), lenkw=len(kwargs))
msg = '\n'.join(
[
'+--- UTOOL --- ERROR IN CACHED FUNCTION',
#'dbgdict = ' + utool.repr4(_dbgdict),
'dbgdict2 = ' + util_str.repr4(_dbgdict2),
]
)
util_dbg.printex(ex, msg)
raise
# Give function a handle to the cacher object
cached_wraper = util_decor.preserve_sig(cached_wraper, func)
cached_wraper.cacher = cacher
return cached_wraper
return cached_closure
# --- Global Cache ---
[docs]def view_global_cache_dir(appname='default'):
import utool
dir_ = utool.get_global_cache_dir(appname=appname)
utool.view_directory(dir_)
[docs]def get_global_cache_dir(appname='default', ensure=False):
"""Returns (usually) writable directory for an application cache"""
if appname is None or appname == 'default':
appname = get_default_appname()
global_cache_dir = util_cplat.get_app_resource_dir(
appname, meta_util_constants.global_cache_dname
)
if ensure:
util_path.ensuredir(global_cache_dir)
return global_cache_dir
[docs]def get_global_shelf_fpath(appname='default', ensure=False):
"""Returns the filepath to the global shelf"""
global_cache_dir = get_global_cache_dir(appname, ensure=ensure)
shelf_fpath = join(global_cache_dir, meta_util_constants.global_cache_fname)
return shelf_fpath
[docs]def shelf_open(fpath):
"""
allows for shelf to be used in with statements
References:
http://stackoverflow.com/questions/7489732/easiest-way-to-add-a-function-to-existing-class
CommandLine:
python -m utool.util_cache --test-shelf_open
Example:
>>> # DISABLE_DOCTEST
>>> # UNSTABLE_DOCTEST
>>> import utool as ut
>>> fpath = ut.unixjoin(ut.ensure_app_resource_dir('utool'), 'testshelf.shelf')
>>> with ut.shelf_open(fpath) as dict_:
... print(ut.repr4(dict_))
"""
return contextlib.closing(shelve.open(fpath))
# class YAWShelf(object):
# def __init__(self, shelf_fpath):
# self.shelf_fpath = shelf_fpath
# import shelve
# self.shelf = shelve.open(shelf_fpath)
[docs]class GlobalShelfContext(object):
"""older class. might need update"""
def __init__(self, appname):
self.appname = appname
def __enter__(self):
# self.shelf = get_global_shelf(self.appname)
try:
import dbm
DBMError = dbm.error
except Exception:
DBMError = OSError
try:
shelf_fpath = get_global_shelf_fpath(self.appname, ensure=True)
if VERBOSE:
print('[cache] open: ' + shelf_fpath)
self.shelf = shelve.open(shelf_fpath)
except DBMError as ex:
from utool import util_dbg
util_dbg.printex(
ex,
'Failed opening shelf_fpath due to bad version, remove and retry',
key_list=['shelf_fpath'],
)
import utool as ut
ut.delete(shelf_fpath)
self.shelf = shelve.open(shelf_fpath)
except Exception as ex:
from utool import util_dbg
util_dbg.printex(ex, 'Failed opening shelf_fpath', key_list=['shelf_fpath'])
raise
return self.shelf
def __exit__(self, type_, value, trace):
self.shelf.close()
if trace is not None:
print('[cache] Error under GlobalShelfContext!: ' + str(value))
return False # return a falsey value on error
# close_global_shelf(self.appname)
[docs]def global_cache_read(key, appname='default', **kwargs):
with GlobalShelfContext(appname) as shelf:
if 'default' in kwargs:
return shelf.get(key, kwargs['default'])
else:
return shelf[key]
[docs]def global_cache_dump(appname='default'):
shelf_fpath = get_global_shelf_fpath(appname)
print('shelf_fpath = %r' % shelf_fpath)
with GlobalShelfContext(appname) as shelf:
print(util_str.repr4(shelf))
[docs]def global_cache_write(key, val, appname='default'):
"""Writes cache files to a safe place in each operating system"""
with GlobalShelfContext(appname) as shelf:
shelf[key] = val
[docs]def delete_global_cache(appname='default'):
"""Reads cache files to a safe place in each operating system"""
# close_global_shelf(appname)
shelf_fpath = get_global_shelf_fpath(appname)
util_path.remove_file(shelf_fpath, verbose=True, dryrun=False)
# import abc # abstract base class
# import six
# @six.add_metaclass(abc.ABCMeta)
[docs]class Cachable(object):
"""
Abstract base class.
This class which enables easy caching of object dictionarys
must implement get_cfgstr()
"""
ext = '.cPkl' # TODO: Capt'n Proto backend to replace pickle backend
# @abc.abstractmethod
[docs] def get_cfgstr(self):
return getattr(self, 'cfgstr', 'DEFAULT')
# return 'DEFAULT'
# raise NotImplementedError('abstract method')
# @abc.abstractmethod
[docs] def get_prefix(self):
# import utool as ut
return self.__class__.__name__ + '_'
# return ut.get_funcname(self.__class__) + '_'
# raise NotImplementedError('abstract method')
[docs] def get_cachedir(self, cachedir=None):
if cachedir is None:
if hasattr(self, 'cachedir'):
cachedir = self.cachedir
else:
cachedir = '.'
return cachedir
[docs] def get_fname(self, cfgstr=None, ext=None):
# convinience
return basename(self.get_fpath('', cfgstr=cfgstr, ext=ext))
[docs] def get_fpath(self, cachedir=None, cfgstr=None, ext=None):
"""
Ignore:
fname = _fname
cfgstr = _cfgstr
"""
_dpath = self.get_cachedir(cachedir)
_fname = self.get_prefix()
_cfgstr = self.get_cfgstr() if cfgstr is None else cfgstr
_ext = self.ext if ext is None else ext
fpath = _args2_fpath(_dpath, _fname, _cfgstr, _ext)
return fpath
[docs] def delete(
self, cachedir=None, cfgstr=None, verbose=True or VERBOSE or util_arg.VERBOSE
):
"""
saves query result to directory
"""
fpath = self.get_fpath(cachedir, cfgstr=cfgstr)
if verbose:
print('[Cachable] cache delete: %r' % (basename(fpath),))
os.remove(fpath)
[docs] @profile
def save(
self, cachedir=None, cfgstr=None, verbose=VERBOSE, quiet=QUIET, ignore_keys=None
):
"""
saves query result to directory
"""
fpath = self.get_fpath(cachedir, cfgstr=cfgstr)
if verbose:
print('[Cachable] cache save: %r' % (basename(fpath),))
if hasattr(self, '__getstate__'):
statedict = self.__getstate__()
else:
statedict = self.__dict__
if ignore_keys is None:
save_dict = statedict
else:
save_dict = {
key: val
for (key, val) in six.iteritems(statedict)
if key not in ignore_keys
}
util_io.save_data(fpath, save_dict)
return fpath
# save_cache(cachedir, '', cfgstr, self.__dict__)
# with open(fpath, 'wb') as file_:
# pickle.dump(self.__dict__, file_)
def _unsafe_load(self, fpath, ignore_keys=None):
loaded_dict = util_io.load_data(fpath)
if ignore_keys is not None:
for key in ignore_keys:
if key in loaded_dict:
del loaded_dict[key]
if hasattr(self, '__setstate__'):
self.__setstate__(loaded_dict)
else:
self.__dict__.update(loaded_dict)
# with open(fpath, 'rb') as file_:
# loaded_dict = pickle.load(file_)
# self.__dict__.update(loaded_dict)
[docs] def glob_valid_targets(self, cachedir=None, partial_cfgstr=''):
from utool import util_path
prefix = self.get_prefix()
pattern = prefix + '*' + partial_cfgstr + '*' + self.ext
cachedir = self.get_cachedir(cachedir)
valid_targets = util_path.glob(cachedir, pattern, recursive=False)
return valid_targets
[docs] def fuzzyload(self, cachedir=None, partial_cfgstr='', **kwargs):
"""
Try and load from a partially specified configuration string
"""
valid_targets = self.glob_valid_targets(cachedir, partial_cfgstr)
if len(valid_targets) != 1:
import utool as ut
msg = 'need to further specify target. valid_targets=%s' % (
ut.repr3(valid_targets)
)
raise ValueError(msg)
fpath = valid_targets[0]
self.load(fpath=fpath, **kwargs)
[docs] @profile
def load(
self,
cachedir=None,
cfgstr=None,
fpath=None,
verbose=None,
quiet=QUIET,
ignore_keys=None,
):
"""
Loads the result from the given database
"""
if verbose is None:
verbose = getattr(self, 'verbose', VERBOSE)
if fpath is None:
fpath = self.get_fpath(cachedir, cfgstr=cfgstr)
if verbose:
print('[Cachable] cache tryload: %r' % (basename(fpath),))
try:
self._unsafe_load(fpath, ignore_keys)
if verbose:
print('... self cache hit: %r' % (basename(fpath),))
except ValueError as ex:
import utool as ut
msg = '[!Cachable] Cachable(%s) is likely corrupt' % (self.get_cfgstr())
print('CORRUPT fpath = %s' % (fpath,))
ut.printex(ex, msg, iswarning=True)
raise
# except BadZipFile as ex:
except zipfile.error as ex:
import utool as ut
msg = '[!Cachable] Cachable(%s) has bad zipfile' % (self.get_cfgstr())
print('CORRUPT fpath = %s' % (fpath,))
ut.printex(ex, msg, iswarning=True)
raise
# if exists(fpath):
# #print('[Cachable] Removing corrupted file: %r' % fpath)
# #os.remove(fpath)
# raise hsexcept.HotsNeedsRecomputeError(msg)
# else:
# raise Exception(msg)
except IOError as ex:
import utool as ut
if not exists(fpath):
msg = '... self cache miss: %r' % (basename(fpath),)
if verbose:
print(msg)
raise
print('CORRUPT fpath = %s' % (fpath,))
msg = '[!Cachable] Cachable(%s) is corrupt' % (self.get_cfgstr())
ut.printex(ex, msg, iswarning=True)
raise
except Exception as ex:
import utool as ut
ut.printex(ex, 'unknown exception while loading query result')
raise
[docs]def get_lru_cache(max_size=5):
"""
Args:
max_size (int):
References:
https://github.com/amitdev/lru-dict
CommandLine:
python -m utool.util_cache --test-get_lru_cache
Example:
>>> # DISABLE_DOCTEST
>>> # UNSTABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut # NOQA
>>> max_size = 5
>>> # execute function
>>> cache_obj = get_lru_cache(max_size)
>>> cache_obj[1] = 1
>>> cache_obj[2] = 2
>>> cache_obj[3] = 3
>>> cache_obj[4] = 4
>>> cache_obj[5] = 5
>>> cache_obj[6] = 6
>>> # verify results
>>> result = ut.repr2(dict(cache_obj), nl=False)
>>> print(result)
{2: 2, 3: 3, 4: 4, 5: 5, 6: 6}
"""
USE_C_LRU = False
if USE_C_LRU:
import lru
cache_obj = lru.LRU(max_size)
else:
cache_obj = LRUDict(max_size)
return cache_obj
[docs]class LRUDict(object):
"""
Pure python implementation for lru cache fallback
References:
http://www.kunxi.org/blog/2014/05/lru-cache-in-python/
Args:
max_size (int): (default = 5)
Returns:
LRUDict: cache_obj
CommandLine:
python -m utool.util_cache --test-LRUDict
Example:
>>> # ENABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> max_size = 5
>>> self = LRUDict(max_size)
>>> for count in range(0, 5):
... self[count] = count
>>> print(self)
>>> self[0]
>>> for count in range(5, 8):
... self[count] = count
>>> print(self)
>>> del self[5]
>>> assert 4 in self
>>> result = ('self = %r' % (self,))
>>> print(result)
self = LRUDict({
4: 4,
0: 0,
6: 6,
7: 7,
})
"""
def __init__(self, max_size):
self._max_size = max_size
self._cache = collections.OrderedDict()
[docs] def has_key(self, item):
return item in self
def __contains__(self, item):
return item in self._cache
def __delitem__(self, key):
del self._cache[key]
def __str__(self):
import utool as ut
return ut.repr4(self._cache, nl=False)
def __repr__(self):
import utool as ut
return 'LRUDict(' + ut.repr4(self._cache) + ')'
# return repr(self._cache)
def __iter__(self):
return iter(self._cache)
[docs] def items(self):
return self._cache.items()
[docs] def keys(self):
return self._cache.keys()
[docs] def values(self):
return self._cache.values()
[docs] def iteritems(self):
return self._cache.iteritems()
[docs] def iterkeys(self):
return self._cache.iterkeys()
[docs] def itervalues(self):
return self._cache.itervalues()
[docs] def clear(self):
return self._cache.clear()
def __len__(self):
return len(self._cache)
def __getitem__(self, key):
try:
value = self._cache.pop(key)
self._cache[key] = value
return value
except KeyError:
raise
def __setitem__(self, key, value):
try:
self._cache.pop(key)
except KeyError:
if len(self._cache) >= self._max_size:
self._cache.popitem(last=False)
self._cache[key] = value
[docs]def time_different_diskstores():
"""
%timeit shelf_write_test() # 15.1 ms per loop
%timeit cPickle_write_test() # 1.26 ms per loop
%timeit shelf_read_test() # 8.77 ms per loop
%timeit cPickle_read_test() # 2.4 ms per loop
%timeit cPickle_read_test2() # 2.35 ms per loop
%timeit json_read_test()
%timeit json_write_test()
"""
import utool as ut
import simplejson as json
shelf_path = 'test.shelf'
json_path = 'test.json'
cpkl_path = 'test.pkl'
size = 1000
dict_ = {str(key): str(uuid.uuid4()) for key in range(size)}
ut.delete(cpkl_path)
ut.delete(json_path)
ut.delete(shelf_path)
def shelf_write_test():
with ut.shelf_open(shelf_path) as shelf_dict:
shelf_dict.update(dict_)
def shelf_read_test():
with ut.shelf_open(shelf_path) as shelf_dict:
test = {key: val for key, val in six.iteritems(shelf_dict)}
assert len(test) > 0
def json_write_test():
with open(json_path, 'wb') as outfile:
json.dump(dict_, outfile)
def cPickle_write_test():
with open(cpkl_path, 'wb') as outfile:
pickle.dump(dict_, outfile)
def cPickle_read_test():
with open(cpkl_path, 'rb') as outfile:
test = {key: val for key, val in six.iteritems(pickle.load(outfile))}
assert len(test) > 0
def cPickle_read_test2():
with open(cpkl_path, 'rb') as outfile:
test = pickle.load(outfile)
assert len(test) > 0
shelf_write_test()
shelf_read_test()
# json_write_test()
# json_read_test()
cPickle_write_test()
cPickle_read_test()
cPickle_read_test2()
[docs]class KeyedDefaultDict(util_dict.DictLike):
def __init__(self, default_func, *args, **kwargs):
self._default_func = default_func
self._args = args
self._kwargs = kwargs
self._internal = {}
[docs] def setitem(self, key, value):
self._internal[key] = value
[docs] def getitem(self, key):
if key not in self._internal:
value = self._default_func(key, *self._args, **self._kwargs)
self._internal[key] = value
return self._internal[key]
[docs] def keys(self):
return self._internal.keys()
[docs] def values(self):
return self._internal.values()
# @six.add_metaclass(util_class.ReloadingMetaclass)
[docs]@util_class.reloadable_class
class LazyDict(object):
# class LazyDict(collections.Mapping):
"""
Hacky dictionary where values that are functions are counted as lazy
CommandLine:
python -m utool.util_cache --exec-LazyDict
Example:
>>> # ENABLE_DOCTEST
>>> from utool.util_cache import * # NOQA
>>> import utool as ut
>>> self = ut.LazyDict()
>>> self['foo'] = lambda: 5
>>> self['bar'] = 4
>>> try:
>>> self['foo'] = lambda: 9
>>> assert False, 'should not be able to override computable functions'
>>> except ValueError:
>>> pass
>>> self['biz'] = lambda: 9
>>> d = {}
>>> d.update(**self)
>>> self['spam'] = lambda: 'eggs'
>>> self.printinfo()
>>> print(self.tostring(is_eager=False))
"""
def __init__(
self,
other=None,
is_eager=True,
verbose=False,
reprkw=None,
mutable=False,
**kwargs
):
# Registered lazy evaluations
self._eval_funcs = {}
# Computed results
self._stored_results = {}
self.infer_lazy_vals_hack = True
self._is_eager = is_eager
self._verbose = verbose
self.reprkw = dict(is_eager=False, nl=False)
self._mutable = mutable
if reprkw is not None:
self.reprkw.update(**reprkw)
if other is not None:
self.update(other)
if len(kwargs) > 0:
self.update(kwargs)
# --- direct interface
[docs] def set_lazy_func(self, key, func):
assert util_type.is_funclike(func), 'func must be a callable'
# if key in self._stored_results:
# raise ValueError(
# ('Cannot add new lazy function for key=%r'
# 'that has been computed') % (key,))
# if key in self._stored_results:
if not self._mutable and key in self.reconstructable_keys():
raise ValueError(('Cannot overwrite lazy function for key=%r') % (key,))
self._eval_funcs[key] = func
[docs] def setitem(self, key, value):
# HACK, lazy funcs should all be registered
# this should should always just set a value
if not self._mutable and key in self.reconstructable_keys():
raise ValueError(('Cannot overwrite lazy function for key=%r') % (key,))
if self.infer_lazy_vals_hack and util_type.is_funclike(value):
self.set_lazy_func(key, value)
else:
self._stored_results[key] = value
[docs] def getitem(self, key, is_eager=None):
if is_eager is None:
is_eager = self._is_eager
if is_eager:
return self.eager_eval(key)
else:
return self.lazy_eval(key)
[docs] def nocache_eval(self, key):
"""forces function evaluation"""
func_ = self._eval_funcs[key]
value = func_()
return value
[docs] def eager_eval(self, key):
if key in self._stored_results:
value = self._stored_results[key]
else:
if self._verbose:
print('[util_cache] Evaluating key=%r' % (key,))
value = self.nocache_eval(key)
self._stored_results[key] = value
return value
[docs] def lazy_eval(self, key):
if key in self._stored_results:
value = self._stored_results[key]
else:
value = self._eval_funcs[key]
return value
[docs] def clear_evaluated(self):
for key in list(self.evaluated_keys()):
del self._stored_results[key]
[docs] def clear_stored(self, keys=None):
if keys is None:
keys = list(self.stored_keys())
for key in keys:
del self._stored_results[key]
[docs] def stored_keys(self):
"""keys whose vals that have been explicitly set or evaluated"""
return self._stored_results.keys()
[docs] def reconstructable_keys(self):
"""only keys whose vals that have been set with a backup func"""
return set(self._eval_funcs.keys())
[docs] def all_keys(self):
return set(self.stored_keys()).union(set(self.reconstructable_keys()))
[docs] def unevaluated_keys(self):
"""keys whose vals can be constructed but have not been"""
return set(self.reconstructable_keys()) - set(self.stored_keys())
[docs] def evaluated_keys(self):
"""only keys whose vals have been evaluated from a stored function"""
return set(self.reconstructable_keys()) - set(self.unevaluated_keys())
[docs] def nonreconstructable_keys(self):
"""only keys whose vals that have been explicitly set without a backup func"""
return set(self.all_keys()) - self.reconstructable_keys()
[docs] def cached_keys(self):
"""only keys whose vals that have been explicitly set without a backup func"""
return set(self.nonreconstructable_keys()).union(set(self.evaluated_keys()))
[docs] def printinfo(self):
print('nonreconstructable_keys = %s' % (self.nonreconstructable_keys(),))
print('reconstructable_keys = %s' % (self.reconstructable_keys(),))
print('evaluated_keys = %s' % (self.evaluated_keys(),))
print('unevaluated_keys = %s' % (self.unevaluated_keys(),))
[docs] def asdict(self, is_eager=None):
dict_ = {key: self.getitem(key, is_eager) for key in self.keys()}
return dict_
[docs] def tostring(self, is_eager=None, keys=None, **kwargs):
import utool as ut
dict_ = self.asdict(is_eager=is_eager)
class AwakeFaceRepr(object):
def __repr__(self):
return '!'
# return '(o.o)'
# return "٩(ˊᗜˋ*)و"
class SleepFaceRepr(object):
def __repr__(self):
return 'z'
# return '(-_-)'
# return '(ᵕ≀ᵕ)'
for key in self.evaluated_keys():
# dict_[key] = '!'
dict_[key] = AwakeFaceRepr()
for key in self.unevaluated_keys():
# dict_[key] = 'z'
dict_[key] = SleepFaceRepr()
if keys is not None:
dict_ = ut.dict_subset(dict_, keys)
return ut.repr2(dict_, **kwargs)
# --- dict interface
[docs] def get(self, key, *d):
if len(d) > 1:
raise ValueError('can only specify one default')
elif len(d) == 1:
# assert len(d) == 0, 'no support for default yet'
if key not in self:
return d[0]
return self.getitem(key, self._is_eager)
[docs] def update(self, dict_, **kwargs):
for key, val in six.iteritems(dict_):
self[key] = val
for key, val in six.iteritems(kwargs):
self[key] = val
[docs] def keys(self):
return self.all_keys()
[docs] def values(self):
return [self[key] for key in self.keys()]
[docs] def items(self):
return [(key, self[key]) for key in self.keys()]
def __setitem__(self, key, value):
self.setitem(key, value)
def __getitem__(self, key):
return self.get(key)
def __delitem__(self, key):
if key not in self.keys():
raise KeyError(key)
if key in self._eval_funcs:
del self._eval_funcs[key]
if key in self._stored_results:
del self._stored_results[key]
def __iter__(self):
return iter(self.keys())
def __len__(self):
return len(self.keys())
def __str__(self):
return self.tostring()
def __repr__(self):
return self.tostring(**self.reprkw)
# def __getstate__(self):
# state_dict = self.asdict()
# return state_dict
# def __setstate__(self, state_dict):
# self._stored_results.update(state_dict)
[docs]@six.add_metaclass(util_class.ReloadingMetaclass)
class LazyList(object):
"""very hacky list implemented as a dictionary"""
def __init__(self, **kwargs):
self._hackstore = LazyDict(**kwargs)
def __len__(self):
return len(self._hackstore)
def __getitem__(self, index):
try:
return self._hackstore[index]
except KeyError:
# raise ValueError('index=%r out of bounds' % (index,))
raise ValueError(
'index=%r out of bounds or error computing lazy value.' % (index,)
)
[docs] def append(self, item):
self._hackstore[len(self._hackstore)] = item
[docs] def tolist(self):
return self._hackstore.values()
def __iter__(self):
for index in range(len(self)):
yield self[index]
if __name__ == '__main__':
"""
CommandLine:
python -c "import utool, utool.util_cache; utool.doctest_funcs(utool.util_cache)"
python -m utool.util_cache
python -m utool.util_cache --allexamples
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()