# -*- coding: utf-8 -*-
"""
Module to executes the same function with different arguments in parallel.
"""
from __future__ import absolute_import, division, print_function
import multiprocessing
from concurrent import futures
# import atexit
# import sys
import signal
import ctypes
import six
import threading
from six.moves import map, range, zip # NOQA
from utool._internal.meta_util_six import get_funcname
from utool import util_progress
from utool import util_arg
from utool import util_inject
from utool import util_cplat
if six.PY2:
# import thread as _thread
import Queue as queue
elif six.PY3:
# import _thread
import queue
util_inject.noinject('[parallel]')
SILENT = util_arg.SILENT
if SILENT:
def print(msg):
pass
# Default number of cores to use when doing parallel processing
__NUM_PROCS__ = util_arg.get_argval(('--nprocs', '--num-procs'), type_=int, default=None)
# If true parallelism is disabled
__FORCE_SERIAL__ = util_arg.get_argflag(
('--utool-force-serial', '--force-serial', '--serial')
)
# FIXME: running tests in IBEIS has errors when this number is low
# Due to the large number of parallel processes running?
__MIN_PARALLEL_TASKS__ = 4
if util_cplat.WIN32:
__MIN_PARALLEL_TASKS__ = 16
[docs]def generate2(
func,
args_gen,
kw_gen=None,
ntasks=None,
ordered=True,
force_serial=False,
use_pool=False,
chunksize=None,
nprocs=None,
progkw={},
nTasks=None,
verbose=None,
futures_threaded=True,
timeout=3600,
):
r"""
Interfaces to either multiprocessing or futures.
Esentially maps ``args_gen`` onto ``func`` using pool.imap.
However, args_gen must be a tuple of args that will be unpacked and send to
the function. Thus, the function can take multiple args. Also specifing
keyword args is supported.
Useful for embarrassingly parallel loops. Currently does not work with
opencv3
CommandLine:
python -m utool.util_parallel generate2
Args:
func (function): live python function
args_gen (?):
kw_gen (None): (default = None)
ntasks (None): (default = None)
ordered (bool): (default = True)
force_serial (bool): (default = False)
verbose (bool): verbosity flag(default = None)
CommandLine:
python -m utool.util_parallel generate2
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_parallel import * # NOQA
>>> from utool.util_parallel import _kw_wrap_worker # NOQA
>>> import utool as ut
>>> args_gen = list(zip(range(10000)))
>>> kw_gen = [{}] * len(args_gen)
>>> func = ut.is_prime
>>> _ = list(generate2(func, args_gen))
>>> _ = list(generate2(func, args_gen, ordered=False))
>>> _ = list(generate2(func, args_gen, force_serial=True))
>>> _ = list(generate2(func, args_gen, use_pool=True))
>>> _ = list(generate2(func, args_gen, futures_threaded=True))
>>> _ = list(generate2(func, args_gen, ordered=False, verbose=False))
Example:
>>> # ENABLE_DOCTEST
>>> import utool as ut
>>> #num = 8700 # parallel is slower for smaller numbers
>>> num = 500 # parallel has an initial (~.1 second startup overhead)
>>> print('TESTING SERIAL')
>>> func = ut.is_prime
>>> args_list = list(range(0, num))
>>> flag_generator0 = ut.generate2(ut.is_prime, zip(range(0, num)), force_serial=True)
>>> flag_list0 = list(flag_generator0)
>>> print('TESTING PARALLEL (PROCESS)')
>>> flag_generator1 = ut.generate2(ut.is_prime, zip(range(0, num)))
>>> flag_list1 = list(flag_generator1)
>>> print('TESTING PARALLEL (THREAD)')
>>> flag_generator2 = ut.generate2(ut.is_prime, zip(range(0, num)), futures_threaded=True)
>>> flag_list2 = list(flag_generator2)
>>> print('ASSERTING')
>>> assert len(flag_list1) == num
>>> assert len(flag_list2) == num
>>> assert flag_list0 == flag_list1
>>> assert flag_list0 == flag_list2
Example:
>>> # ENABLE_DOCTEST
>>> # Trying to recreate the freeze seen in IBEIS
>>> import utool as ut
>>> print('TESTING SERIAL')
>>> flag_generator0 = ut.generate2(ut.is_prime, zip(range(0, 1)))
>>> flag_list0 = list(flag_generator0)
>>> flag_generator1 = ut.generate2(ut.fibonacci_recursive, zip(range(0, 1)))
>>> flag_list1 = list(flag_generator1)
>>> print('TESTING PARALLEL')
>>> flag_generator2 = ut.generate2(ut.is_prime, zip(range(0, 12)))
>>> flag_list2 = list(flag_generator2)
>>> flag_generator3 = ut.generate2(ut.fibonacci_recursive, zip(range(0, 12)))
>>> flag_list3 = list(flag_generator3)
>>> print('flag_list0 = %r' % (flag_list0,))
>>> print('flag_list1 = %r' % (flag_list1,))
>>> print('flag_list2 = %r' % (flag_list1,))
>>> print('flag_list3 = %r' % (flag_list1,))
Example:
>>> # DISABLE_DOCTEST
>>> # UNSTABLE_DOCTEST
>>> # Trying to recreate the freeze seen in IBEIS
>>> try:
>>> import vtool as vt
>>> except ImportError:
>>> import vtool as vt
>>> import utool as ut
>>> from wbia.algo.preproc.preproc_chip import gen_chip
>>> #from wbia.algo.preproc.preproc_feat import gen_feat_worker
>>> key_list = ['grace.jpg', 'easy1.png', 'ada2.jpg', 'easy3.png',
>>> 'hard3.png', 'zebra.png', 'patsy.jpg', 'ada.jpg',
>>> 'carl.jpg', 'lena.png', 'easy2.png']
>>> img_fpath_list = [ut.grab_test_imgpath(key) for key in key_list]
>>> arg_list1 = [(ut.augpath(img_fpath, '_testgen'), img_fpath, (0, 0, 100, 100), 0.0, (545, 372), []) for img_fpath in img_fpath_list[0:1]]
>>> arg_list2 = [(ut.augpath(img_fpath, '_testgen'), img_fpath, (0, 0, 100, 100), 0.0, (545, 372), []) for img_fpath in img_fpath_list]
>>> #arg_list3 = [(count, fpath, {}) for count, fpath in enumerate(ut.get_list_column(arg_list1, 0))]
>>> #arg_list4 = [(count, fpath, {}) for count, fpath in enumerate(ut.get_list_column(arg_list2, 0))]
>>> ut.remove_file_list(ut.get_list_column(arg_list2, 0))
>>> chips1 = [x for x in ut.generate2(gen_chip, arg_list1)]
>>> chips2 = [y for y in ut.generate2(gen_chip, arg_list2, force_serial=True)]
>>> #feats3 = [z for z in ut.generate2(gen_feat_worker, arg_list3)]
>>> #feats4 = [w for w in ut.generate2(gen_feat_worker, arg_list4)]
Example:
>>> # DISABLE_DOCTEST
>>> # FAILING_DOCTEST
>>> # Trying to recreate the freeze seen in IBEIS
>>> # Extremely weird case: freezes only if dsize > (313, 313) AND __testwarp was called beforehand.
>>> # otherwise the parallel loop works fine. Could be an opencv 3.0.0-dev issue.
>>> try:
>>> import vtool as vt
>>> except ImportError:
>>> import vtool as vt
>>> import utool as ut
>>> from wbia.algo.preproc.preproc_chip import gen_chip
>>> import cv2
>>> from utool.util_parallel import __testwarp
>>> key_list = ['grace.jpg', 'easy1.png', 'ada2.jpg', 'easy3.png',
>>> 'hard3.png', 'zebra.png', 'patsy.jpg', 'ada.jpg',
>>> 'carl.jpg', 'lena.png', 'easy2.png']
>>> img_fpath_list = [ut.grab_test_imgpath(key) for key in key_list]
>>> arg_list1 = [(vt.imread(fpath),) for fpath in img_fpath_list[0:1]]
>>> arg_list2 = [(vt.imread(fpath),) for fpath in img_fpath_list]
>>> #new1 = [x for x in ut.generate2(__testwarp, arg_list1)]
>>> new1 = __testwarp(arg_list1[0])
>>> new2 = [y for y in ut.generate2(__testwarp, arg_list2, force_serial=False)]
>>> #print('new2 = %r' % (new2,))
#Example:
# >>> # Freakin weird. When IBEIS Runs generate it doesn't close the processes
# >>> # UNSTABLE_DOCTEST
# >>> # python -m utool.util_parallel --test-generate:4
# >>> # Trying to see if we can recreate the problem where there are
# >>> # defunct python processes
# >>> import utool as ut
# >>> #num = 8700 # parallel is slower for smaller numbers
# >>> num = 70000 # parallel has an initial (~.1 second startup overhead)
# >>> print('TESTING PARALLEL')
# >>> flag_generator1 = list(ut.generate2(ut.is_prime, range(0, num)))
# >>> flag_generator1 = list(ut.generate2(ut.is_prime, range(0, num)))
# >>> import time
# >>> time.sleep(10)
"""
if verbose is None:
verbose = 2
if ntasks is None:
ntasks = nTasks
if ntasks is None:
try:
ntasks = len(args_gen)
except TypeError:
# Cast to a list
args_gen = list(args_gen)
ntasks = len(args_gen)
if ntasks == 1 or ntasks < __MIN_PARALLEL_TASKS__:
force_serial = True
if __FORCE_SERIAL__:
force_serial = __FORCE_SERIAL__
if ntasks == 0:
if verbose:
print('[ut.generate2] submitted 0 tasks')
return
if nprocs is None:
nprocs = min(ntasks, get_default_numprocs())
if nprocs == 1:
force_serial = True
if kw_gen is None:
kw_gen = [{}] * ntasks
if isinstance(kw_gen, dict):
# kw_gen can be a single dict applied to everything
kw_gen = [kw_gen] * ntasks
if force_serial:
for result in _generate_serial2(
func, args_gen, kw_gen, ntasks=ntasks, progkw=progkw, verbose=verbose
):
yield result
else:
if verbose:
gentype = 'mp' if use_pool else 'futures'
fmtstr = '[generate2] executing {} {} tasks using {} {} procs'
print(fmtstr.format(ntasks, get_funcname(func), nprocs, gentype))
if verbose > 1:
lbl = '(pargen) %s: ' % (get_funcname(func),)
progkw_ = dict(freq=None, bs=True, adjust=False, freq_est='absolute')
progkw_.update(progkw)
# print('progkw_.update = {!r}'.format(progkw_.update))
progpart = util_progress.ProgPartial(length=ntasks, lbl=lbl, **progkw_)
if use_pool:
# Use multiprocessing
if chunksize is None:
chunksize = max(min(4, ntasks), min(8, ntasks // (nprocs ** 2)))
try:
pool = multiprocessing.Pool(nprocs)
if ordered:
pmap_func = pool.imap
else:
pmap_func = pool.imap_unordered
wrapped_arg_gen = zip([func] * len(args_gen), args_gen, kw_gen)
res_gen = pmap_func(_kw_wrap_worker, wrapped_arg_gen, chunksize)
if verbose > 1:
res_gen = progpart(res_gen)
for res in res_gen:
yield res
finally:
pool.close()
pool.join()
else:
if futures_threaded:
executor_cls = futures.ThreadPoolExecutor
else:
executor_cls = futures.ProcessPoolExecutor
# Use futures
executor = executor_cls(nprocs)
try:
fs_list = [
executor.submit(func, *a, **k) for a, k in zip(args_gen, kw_gen)
]
fs_gen = fs_list
if not ordered:
fs_gen = futures.as_completed(fs_gen)
if verbose > 1:
fs_gen = progpart(fs_gen)
for fs in fs_gen:
yield fs.result(timeout=timeout)
finally:
executor.shutdown(wait=True)
def _kw_wrap_worker(func_args_kw):
func, args, kw = func_args_kw
return func(*args, **kw)
def _generate_serial2(
func, args_gen, kw_gen=None, ntasks=None, progkw={}, verbose=None, nTasks=None
):
"""internal serial generator"""
if verbose is None:
verbose = 2
if ntasks is None:
ntasks = nTasks
if ntasks is None:
ntasks = len(args_gen)
if verbose > 0:
print(
'[ut._generate_serial2] executing %d %s tasks in serial'
% (ntasks, get_funcname(func))
)
# kw_gen can be a single dict applied to everything
if kw_gen is None:
kw_gen = [{}] * ntasks
if isinstance(kw_gen, dict):
kw_gen = [kw_gen] * ntasks
# Get iterator with or without progress
if verbose > 1:
lbl = '(sergen) %s: ' % (get_funcname(func),)
progkw_ = dict(freq=None, bs=True, adjust=False, freq_est='between')
progkw_.update(progkw)
args_gen = util_progress.ProgIter(args_gen, length=ntasks, lbl=lbl, **progkw_)
for args, kw in zip(args_gen, kw_gen):
result = func(*args, **kw)
yield result
[docs]def set_num_procs(num_procs):
global __NUM_PROCS__
__NUM_PROCS__ = num_procs
[docs]def in_main_process():
"""
Returns if you are executing in a multiprocessing subprocess
Usefull to disable init print messages on windows
"""
return multiprocessing.current_process().name == 'MainProcess'
[docs]def get_sys_thread_limit():
import utool as ut
if ut.LINUX:
out, err, ret = ut.cmd('ulimit', '-u', verbose=False, quiet=True, shell=True)
else:
raise NotImplementedError('')
[docs]def get_default_numprocs():
if __NUM_PROCS__ is not None:
return __NUM_PROCS__
# if WIN32:
# num_procs = 3 # default windows to 3 processes for now
# else:
# num_procs = max(multiprocessing.cpu_count() - 2, 1)
num_procs = max(multiprocessing.cpu_count() - 1, 1)
return num_procs
[docs]def init_worker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def __testwarp(tup):
# THIS DOES NOT CAUSE A PROBLEM FOR SOME FREAKING REASON
import cv2
import numpy as np
try:
import vtool as vt
except ImportError:
import vtool as vt
img = tup[0]
M = vt.rotation_mat3x3(0.1)[0:2].dot(vt.translation_mat3x3(-10, 10))
# new = cv2.warpAffine(img, M[0:2], (500, 500), flags=cv2.INTER_LANCZOS4,
# borderMode=cv2.BORDER_CONSTANT)
# ONLY FAILS WHEN OUTPUT SIZE IS LARGE
# dsize = (314, 314) # (313, 313) does not cause the error
dsize = (500, 500) # (313, 313) does not cause the error
dst = np.empty(dsize[::-1], dtype=img.dtype)
# new = cv2.warpAffine(img, M[0:2], dsize)
print('Warping?')
new = cv2.warpAffine(img, M[0:2], dsize, dst)
print(dst is new)
return new
def _test_buffered_generator():
"""
Test for standard python calls
CommandLine:
python -m utool.util_parallel --test-_test_buffered_generator
Example:
>>> # DISABLE_DOCTEST
>>> import utool as ut
>>> from utool.util_parallel import * # NOQA
>>> from utool.util_parallel import _test_buffered_generator # NOQA
>>> _test_buffered_generator()
"""
import utool as ut
# ---- Func and Sleep Definitions
args = [346373] # 38873
func = ut.is_prime
def sleepfunc(prime=args[0]):
# time.sleep(.1)
import utool as ut
[ut.is_prime(prime) for _ in range(2)]
_test_buffered_generator_general(func, args, sleepfunc, 10.0)
def _test_buffered_generator2():
"""
CommandLine:
python -m utool.util_parallel --test-_test_buffered_generator2
Looking at about time_thresh=15s or 350 iterations to get buffered over
serial.
Test for numpy calls
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_parallel import * # NOQA
>>> _test_buffered_generator2()
"""
import numpy as np
# import utool as ut
# ---- Func and Sleep Definitions
from functools import partial
rng = np.random.RandomState(0)
args = [rng.rand(256, 256) for _ in range(32)] # 38873
func = partial(np.divide, 4.3)
def sleepfunc(prime=346373):
# time.sleep(.1)
import utool as ut
[ut.is_prime(prime) for _ in range(2)]
_test_buffered_generator_general(func, args, sleepfunc, 15.0)
def _test_buffered_generator3():
"""
CommandLine:
python -m utool.util_parallel --test-_test_buffered_generator3
This test suggests that a ut.buffered_generator is better for disk IO than
ut.generate
Example:
>>> # DISABLE_DOCTEST
>>> from utool.util_parallel import * # NOQA
>>> _test_buffered_generator3()
"""
try:
import vtool as vt
except ImportError:
import vtool as vt
import utool as ut
# ---- Func and Sleep Definitions
args = list(map(ut.grab_test_imgpath, ut.get_valid_test_imgkeys()))
func = vt.imread
def sleepfunc(prime=346373):
# time.sleep(.1)
import utool as ut
[ut.is_prime(prime) for _ in range(2)]
_test_buffered_generator_general(func, args, sleepfunc, 4.0)
def _test_buffered_generator_general(
func,
args,
sleepfunc,
target_looptime=1.0,
serial_cheat=1,
argmode=False,
buffer_size=2,
):
"""
# We are going to generate output of func in the background while sleep
# func is running in the foreground
# --- Hyperparams
target_looptime = 1.5 # maximum time to run all loops
"""
import utool as ut
# serial_cheat = 1 # approx division factor to run serial less times
show_serial = True # target_looptime < 10. # 3.0
with ut.Timer('One* call to func') as t_fgfunc:
results = [func(arg) for arg in args]
functime = t_fgfunc.ellapsed / len(args)
# sleepfunc = ut.is_prime
with ut.Timer('One* call to sleep func') as t_sleep:
if argmode:
[sleepfunc(x) for x in results]
else:
[sleepfunc() for x in results]
sleeptime = t_sleep.ellapsed / len(args)
# compute amount of loops to run
_num_loops = round(target_looptime // (functime + sleeptime))
num_data = int(_num_loops // len(args))
num_loops = int(num_data * len(args))
serial_cheat = min(serial_cheat, num_data)
data = ut.flatten([args] * num_data)
est_tsleep = sleeptime * num_loops
est_tfunc = functime * num_loops
est_needed_buffers = sleeptime / functime
print(
'Estimated stats'
+ ut.repr4(
ut.dict_subset(
locals(),
[
'num_loops',
'functime',
'sleeptime',
'est_tsleep',
'est_tfunc',
'serial_cheat',
'buffer_size',
'est_needed_buffers',
],
)
)
)
if show_serial:
with ut.Timer('serial') as t1:
# cheat for serial to make it go faster
for x in map(func, data[: len(data) // serial_cheat]):
if argmode:
sleepfunc(x)
else:
sleepfunc()
t_serial = serial_cheat * t1.ellapsed
print("...toc('adjusted_serial') = %r" % (t_serial))
with ut.Timer('ut.buffered_generator') as t2:
gen_ = ut.buffered_generator(map(func, data), buffer_size=buffer_size)
for x in gen_:
if argmode:
sleepfunc(x)
else:
sleepfunc()
with ut.Timer('ut.generate') as t3:
gen_ = ut.generate2(func, zip(data), chunksize=buffer_size, quiet=1, verbose=0)
for x in gen_:
if argmode:
sleepfunc(x)
else:
sleepfunc()
# Compare theoretical vs practical efficiency
print('\n Theoretical Results')
def parallel_efficiency(ellapsed, est_tsleep, est_tfunc):
return (1 - ((ellapsed - est_tsleep) / est_tfunc)) * 100
if show_serial:
print(
'Theoretical gain (serial) = %.3f%%'
% (parallel_efficiency(t_serial, est_tsleep, est_tfunc),)
)
print(
'Theoretical gain (ut.buffered_generator) = %.3f%%'
% (parallel_efficiency(t2.ellapsed, est_tsleep, est_tfunc),)
)
print(
'Theoretical gain (ut.generate) = %.2f%%'
% (parallel_efficiency(t3.ellapsed, est_tsleep, est_tfunc),)
)
if show_serial:
prac_tfunc = t_serial - est_tsleep
print('\n Practical Results')
print(
'Practical gain (serial) = %.3f%%'
% (parallel_efficiency(t1.ellapsed, est_tsleep, prac_tfunc),)
)
print(
'Practical gain (ut.buffered_generator) = %.3f%%'
% (parallel_efficiency(t2.ellapsed, est_tsleep, prac_tfunc),)
)
print(
'Practical gain (ut.generate) = %.2f%%'
% (parallel_efficiency(t3.ellapsed, est_tsleep, prac_tfunc),)
)
def _test_buffered_generator_general2(
bgfunc,
bgargs,
fgfunc,
target_looptime=1.0,
serial_cheat=1,
buffer_size=2,
show_serial=True,
):
"""
# We are going to generate output of bgfunc in the background while
# fgfunc is running in the foreground. fgfunc takes results of bffunc as
# args.
# --- Hyperparams
target_looptime = 1.5 # maximum time to run all loops
"""
import utool as ut
with ut.Timer('One* call to bgfunc') as t_bgfunc:
results = [bgfunc(arg) for arg in bgargs]
bgfunctime = t_bgfunc.ellapsed / len(bgargs)
# fgfunc = ut.is_prime
with ut.Timer('One* call to fgfunc') as t_fgfunc:
[fgfunc(x) for x in results]
fgfunctime = t_fgfunc.ellapsed / len(bgargs)
# compute amount of loops to run
est_looptime = bgfunctime + fgfunctime
_num_loops = round(target_looptime // est_looptime)
num_data = int(_num_loops // len(bgargs))
num_loops = int(num_data * len(bgargs))
serial_cheat = min(serial_cheat, num_data)
data = ut.flatten([bgargs] * num_data)
est_tfg = fgfunctime * num_loops
est_tbg = bgfunctime * num_loops
est_needed_buffers = fgfunctime / bgfunctime
print(
'Estimated stats'
+ ut.repr4(
ut.dict_subset(
locals(),
[
'num_loops',
'bgfunctime',
'fgfunctime',
'est_tfg',
'est_tbg',
'serial_cheat',
'buffer_size',
'est_needed_buffers',
],
)
)
)
if show_serial:
with ut.Timer('serial') as t1:
# cheat for serial to make it go faster
for x in map(bgfunc, data[: len(data) // serial_cheat]):
fgfunc(x)
t_serial = serial_cheat * t1.ellapsed
print("...toc('adjusted_serial') = %r" % (t_serial))
with ut.Timer('ut.buffered_generator') as t2:
gen_ = ut.buffered_generator(map(bgfunc, data), buffer_size=buffer_size)
for x in gen_:
fgfunc(x)
with ut.Timer('ut.generate') as t3:
gen_ = ut.generate2(bgfunc, zip(data), chunksize=buffer_size, quiet=1, verbose=0)
for x in gen_:
fgfunc(x)
# Compare theoretical vs practical efficiency
print('\n Theoretical Results')
def parallel_efficiency(ellapsed, est_tfg, est_tbg):
return (1 - ((ellapsed - est_tfg) / est_tbg)) * 100
if show_serial:
print(
'Theoretical gain (serial) = %.3f%%'
% (parallel_efficiency(t_serial, est_tfg, est_tbg),)
)
print(
'Theoretical gain (ut.buffered_generator) = %.3f%%'
% (parallel_efficiency(t2.ellapsed, est_tfg, est_tbg),)
)
print(
'Theoretical gain (ut.generate) = %.2f%%'
% (parallel_efficiency(t3.ellapsed, est_tfg, est_tbg),)
)
if show_serial:
prac_tbg = t_serial - est_tfg
print('\n Practical Results')
print(
'Practical gain (serial) = %.3f%%'
% (parallel_efficiency(t1.ellapsed, est_tfg, prac_tbg),)
)
print(
'Practical gain (ut.buffered_generator) = %.3f%%'
% (parallel_efficiency(t2.ellapsed, est_tfg, prac_tbg),)
)
print(
'Practical gain (ut.generate) = %.2f%%'
% (parallel_efficiency(t3.ellapsed, est_tfg, prac_tbg),)
)
[docs]def bgfunc(path):
# Test for /_test_buffered_generator_img
# import utool as ut
try:
import vtool as vt
except ImportError:
import vtool as vt
for _ in range(1):
img = vt.imread(path)
img = img ** 1.1
# [ut.is_prime(346373) for _ in range(2)]
return img
def _test_buffered_generator_img():
"""
Test for buffering image read calls
CONCLUSIONS:
Use buffer when bgtime is bigger, but comparable to fgtime
Use buffer when fgtime < bgtime and (fgtime + bgtime) is large
Use genrate when fgtime > bgtime and (fgtime + bgtime) is large
Use serial when fgtime is bigger and all parts are comparitively small
Buffer size should be roughly bgtime / fgtime
Buffering also has a much more even and regular cpu demand.
Also demands less cpus (I think)
CommandLine:
python -m utool.util_parallel --test-_test_buffered_generator_img
Example:
>>> # DISABLE_DOCTEST
>>> import utool as ut
>>> from utool.util_parallel import * # NOQA
>>> from utool.util_parallel import _test_buffered_generator_img # NOQA
>>> from utool.util_parallel import _test_buffered_generator_general2 # NOQA
>>> _test_buffered_generator_img()
"""
import utool as ut
args = [
ut.grab_test_imgpath(key) for key in ut.util_grabdata.get_valid_test_imgkeys()
]
# import cv2
# func = cv2.imread
# bffunc = vt.imread
def sleepfunc_bufwin(x, niters=10):
# import cv2
for z in range(niters):
# operate on image in some capacity
x.cumsum()
for z in range(2):
x ** 1.1
return x
target_looptime = 60.0
# target_looptime = 20.0
# target_looptime = 10.0
# target_looptime = 5.0
serial_cheat = 1
_test_buffered_generator_general2(
bgfunc,
args,
sleepfunc_bufwin,
target_looptime,
serial_cheat,
buffer_size=4,
show_serial=False,
)
# _test_buffered_generator_general2(bgfunc, args, sleepfunc_bufwin, target_looptime, serial_cheat, buffer_size=4, show_serial=True)
[docs]def buffered_generator(source_gen, buffer_size=2, use_multiprocessing=False):
r"""
Generator that runs a slow source generator in a separate process.
My generate function still seems faster on test cases.
However, this function is more flexible in its compatability.
Args:
source_gen (iterable): slow generator
buffer_size (int): the maximal number of items to pre-generate
(length of the buffer) (default = 2)
use_multiprocessing (bool): if False uses GIL-hindered threading
instead of multiprocessing (defualt = False).
Note:
use_multiprocessing = True seems to freeze if passed in a generator
built by six.moves.map.
References:
Taken from Sander Dieleman's data augmentation pipeline
https://github.com/benanne/kaggle-ndsb/blob/11a66cdbddee16c69514b9530a727df0ac6e136f/buffering.py
CommandLine:
python -m utool.util_parallel --test-buffered_generator:0
python -m utool.util_parallel --test-buffered_generator:1
Ignore:
>>> #functime = timeit.timeit(
>>> # 'ut.is_prime(' + str(prime) + ')', setup='import utool as ut',
>>> # number=500) / 1000.0
Example:
>>> # DISABLE_DOCTEST
>>> # UNSTABLE_DOCTEST
>>> from utool.util_parallel import * # NOQA
>>> import utool as ut
>>> num = 2 ** 14
>>> func = ut.is_prime
>>> data = [38873] * num
>>> data = list(range(num))
>>> with ut.Timer('serial') as t1:
... result1 = list(map(func, data))
>>> with ut.Timer('ut.generate2') as t3:
... result3 = list(ut.generate2(func, zip(data), chunksize=2, quiet=1, verbose=0))
>>> with ut.Timer('ut.buffered_generator') as t2:
... result2 = list(ut.buffered_generator(map(func, data)))
>>> assert len(result1) == num and len(result2) == num and len(result3) == num
>>> assert result3 == result2, 'inconsistent results'
>>> assert result1 == result2, 'inconsistent results'
Example:
>>> # DISABLE_DOCTEST
>>> # VERYSLLOOWWW_DOCTEST
>>> from utool.util_parallel import _test_buffered_generator
>>> _test_buffered_generator2()
"""
if buffer_size < 2:
raise RuntimeError('Minimal buffer_ size is 2!')
if use_multiprocessing:
print('WARNING seems to freeze if passed in a generator')
# assert False, 'dont use this buffered multiprocessing'
if False:
pool = multiprocessing.Pool(
processes=get_default_numprocs(),
initializer=init_worker,
maxtasksperchild=None,
)
Process = pool.Process
else:
Process = multiprocessing.Process
_Queue = multiprocessing.Queue
target = _buffered_generation_process
else:
_Queue = queue.Queue
Process = KillableThread
target = _buffered_generation_thread
# the effective buffer_ size is one less, because the generation process
# will generate one extra element and block until there is room in the
# buffer_.
buffer_ = _Queue(maxsize=buffer_size - 1)
# previously None was used as a sentinal, which fails when source_gen
# genrates None need to make object that it will not be generated by the
# process. A reasonable hack is to use the StopIteration exception instead
sentinal = StopIteration
process = Process(target=target, args=(iter(source_gen), buffer_, sentinal))
# if not use_multiprocessing:
process.daemon = True
process.start()
while True:
# output = buffer_.get(timeout=1.0)
output = buffer_.get()
if isinstance(output, sentinal):
return
yield output
# _iter = iter(buffer_.get, sentinal)
# for data in _iter:
# if debug:
# print('Yeidling')
# yield data
def _buffered_generation_thread(source_gen, buffer_, sentinal):
"""helper for buffered_generator"""
for data in source_gen:
buffer_.put(data, block=True)
# sentinel: signal the end of the iterator
buffer_.put(sentinal)
def _buffered_generation_process(source_gen, buffer_, sentinal):
"""helper for buffered_generator"""
for data in source_gen:
buffer_.put(data, block=True)
# sentinel: signal the end of the iterator
buffer_.put(sentinal)
# unfortunately this does not suffice as a signal: if buffer_.get() was
# called and subsequently the buffer_ is closed, it will block forever.
buffer_.close()
[docs]def spawn_background_process(func, *args, **kwargs):
"""
Run a function in the background
(like rebuilding some costly data structure)
References:
http://stackoverflow.com/questions/2046603/is-it-possible-to-run-function-in-a-subprocess-without-threading-or-writing-a-se
http://stackoverflow.com/questions/1196074/starting-a-background-process-in-python
http://stackoverflow.com/questions/15063963/python-is-thread-still-running
Args:
func (function):
CommandLine:
python -m utool.util_parallel --test-spawn_background_process
Example:
>>> # DISABLE_DOCTEST
>>> # SLOW_DOCTEST
>>> from utool.util_parallel import * # NOQA
>>> import utool as ut
>>> import time
>>> from os.path import join
>>> # build test data
>>> fname = 'test_bgfunc_output.txt'
>>> dpath = ut.get_app_resource_dir('utool')
>>> ut.ensuredir(dpath)
>>> fpath = join(dpath, fname)
>>> # ensure file is not around
>>> sleep_time = 1
>>> ut.delete(fpath)
>>> assert not ut.checkpath(fpath, verbose=True)
>>> def backgrond_func(fpath, sleep_time):
... import utool as ut
... import time
... print('[BG] Background Process has started')
... time.sleep(sleep_time)
... print('[BG] Background Process is writing')
... ut.write_to(fpath, 'background process')
... print('[BG] Background Process has finished')
... #raise AssertionError('test exception')
>>> # execute function
>>> func = backgrond_func
>>> args = (fpath, sleep_time)
>>> kwargs = {}
>>> print('[FG] Spawning process')
>>> threadid = ut.spawn_background_process(func, *args, **kwargs)
>>> assert threadid.is_alive() is True, 'thread should be active'
>>> print('[FG] Spawned process. threadid=%r' % (threadid,))
>>> # background process should not have finished yet
>>> assert not ut.checkpath(fpath, verbose=True)
>>> print('[FG] Waiting to check')
>>> time.sleep(sleep_time + .1)
>>> print('[FG] Finished waiting')
>>> # Now the file should be there
>>> assert ut.checkpath(fpath, verbose=True)
>>> assert threadid.is_alive() is False, 'process should have died'
"""
import utool as ut
func_name = ut.get_funcname(func)
name = 'mp.Progress-' + func_name
# proc_obj = multiprocessing.Process(target=func, name=name, args=args, kwargs=kwargs)
proc_obj = KillableProcess(target=func, name=name, args=args, kwargs=kwargs)
# proc_obj.daemon = True
# proc_obj.isAlive = proc_obj.is_alive
proc_obj.start()
return proc_obj
[docs]class KillableProcess(multiprocessing.Process):
"""
Simple subclass of multiprocessing.Process
Gives an additional method to kill all children
as well as itself. calls this function on delete.
DEPRICATE, do not kill processes. It is not a good idea.
It can cause deadlocks.
"""
# def __del__(self):
# self.terminate2()
# super(KillableProcess, self).__del__()
[docs] def terminate2(self):
if self.is_alive():
# print('[terminate2] Killing process')
# Kill all children
import psutil
os_proc = psutil.Process(pid=self.pid)
for child in os_proc.children():
child.terminate()
self.terminate()
else:
# print('[terminate2] Already dead')
pass
# def _process_error_wraper(queue, func, args, kwargs):
# pass
# def spawn_background_process2(func, *args, **kwargs):
# multiprocessing_queue
# import utool as ut
# func_name = ut.get_funcname(func)
# name = 'mp.Progress-' + func_name
# proc_obj = multiprocessing.Process(target=func, name=name, args=args, kwargs=kwargs)
# #proc_obj.isAlive = proc_obj.is_alive
# proc_obj.start()
def _async_raise(tid, excobj):
res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(excobj))
if res == 0:
raise ValueError('nonexistent thread id')
elif res > 1:
# """if it returns a number greater than one, you're in trouble,
# and you should call it again with exc=NULL to revert the effect"""
ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, 0)
raise SystemError('PyThreadState_SetAsyncExc failed')
[docs]class KillableThread(threading.Thread):
"""
DEPRICATE, do not kill threads. It is not a good idea.
It can cause deadlocks.
References:
http://code.activestate.com/recipes/496960-thread2-killable-threads/
http://tomerfiliba.com/recipes/Thread2/
"""
[docs] def raise_exc(self, excobj):
assert self.isAlive(), 'thread must be started'
for tid, tobj in threading._active.items():
if tobj is self:
_async_raise(tid, excobj)
return
# the thread was alive when we entered the loop, but was not found
# in the dict, hence it must have been already terminated. should we raise
# an exception here? silently ignore?
[docs] def terminate(self):
# must raise the SystemExit type, instead of a SystemExit() instance
# due to a bug in PyThreadState_SetAsyncExc
try:
self.raise_exc(SystemExit)
except ValueError:
pass
[docs]def spawn_background_thread(func, *args, **kwargs):
# threadobj = IMPLEMENTATION_NUM
thread_obj = KillableThread(target=func, args=args, kwargs=kwargs)
thread_obj.start()
return thread_obj
[docs]def spawn_background_daemon_thread(func, *args, **kwargs):
# threadobj = IMPLEMENTATION_NUM
thread_obj = KillableThread(target=func, args=args, kwargs=kwargs)
thread_obj.daemon = True
thread_obj.start()
return thread_obj
if __name__ == '__main__':
"""
Ignore:
timing things
python reset_dbs.py --time-generate
python reset_dbs.py --time-generate --force-serial
python reset_dbs.py --time-generate --preinit
python reset_dbs.py --time-generate --force-serial
CommandLine:
python -m utool.util_parallel
python -m utool.util_parallel --allexamples --testslow
coverage run -m utool.util_parallel --allexamples
coverage run -m utool.util_parallel --allexamples --testslow
coverage report html -m utool/util_parallel.py
coverage html
"""
# import multiprocessing
multiprocessing.freeze_support() # for win32
import utool # NOQA
utool.doctest_funcs()