Module threadpool
[frames] | no frames]

Source Code for Module threadpool

  1  # -*- coding: UTF-8 -*- 
  2  """Easy to use object-oriented thread pool framework. 
  3   
  4  A thread pool is an object that maintains a pool of worker threads to perform 
  5  time consuming operations in parallel. It assigns jobs to the threads 
  6  by putting them in a work request queue, where they are picked up by the 
  7  next available thread. This then performs the requested operation in the 
  8  background and puts the results in another queue. 
  9   
 10  The thread pool object can then collect the results from all threads from 
 11  this queue as soon as they become available or after all threads have 
 12  finished their work. It's also possible, to define callbacks to handle 
 13  each result as it comes in. 
 14   
 15  The basic concept and some code was taken from the book "Python in a Nutshell, 
 16  2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section 
 17  14.5 "Threaded Program Architecture". I wrapped the main program logic in the 
 18  ThreadPool class, added the WorkRequest class and the callback system and 
 19  tweaked the code here and there. Kudos also to Florent Aide for the exception 
 20  handling mechanism. 
 21   
 22  Basic usage:: 
 23   
 24      >>> pool = ThreadPool(poolsize) 
 25      >>> requests = makeRequests(some_callable, list_of_args, callback) 
 26      >>> [pool.putRequest(req) for req in requests] 
 27      >>> pool.wait() 
 28   
 29  See the end of the module code for a brief, annotated usage example. 
 30   
 31  Website : http://chrisarndt.de/projects/threadpool/ 
 32   
 33  """ 
 34  __docformat__ = "restructuredtext en" 
 35   
 36  __all__ = [ 
 37      'makeRequests', 
 38      'NoResultsPending', 
 39      'NoWorkersAvailable', 
 40      'ThreadPool', 
 41      'WorkRequest', 
 42      'WorkerThread' 
 43  ] 
 44   
 45  __author__ = "Christopher Arndt" 
 46  __version__ = '1.2.7' 
 47  __revision__ = "$Revision$" 
 48  __date__ = "$Date$" 
 49  __license__ = "MIT license" 
 50   
 51   
 52  # standard library modules 
 53  import sys 
 54  import threading 
 55  import traceback 
 56   
 57  try: 
 58      import Queue            # Python 2 
 59  except ImportError: 
 60      import queue as Queue   # Python 3 
 61   
 62   
 63  # exceptions 
64 -class NoResultsPending(Exception):
65 """All work requests have been processed.""" 66 pass
67
68 -class NoWorkersAvailable(Exception):
69 """No worker threads available to process remaining requests.""" 70 pass
71 72 73 # internal module helper functions
74 -def _handle_thread_exception(request, exc_info):
75 """Default exception handler callback function. 76 77 This just prints the exception info via ``traceback.print_exception``. 78 79 """ 80 traceback.print_exception(*exc_info)
81 82 83 # utility functions
84 -def makeRequests(callable_, args_list, callback=None, 85 exc_callback=_handle_thread_exception):
86 """Create several work requests for same callable with different arguments. 87 88 Convenience function for creating several work requests for the same 89 callable where each invocation of the callable receives different values 90 for its arguments. 91 92 ``args_list`` contains the parameters for each invocation of callable. 93 Each item in ``args_list`` should be either a 2-item tuple of the list of 94 positional arguments and a dictionary of keyword arguments or a single, 95 non-tuple argument. 96 97 See docstring for ``WorkRequest`` for info on ``callback`` and 98 ``exc_callback``. 99 100 """ 101 requests = [] 102 for item in args_list: 103 if isinstance(item, tuple): 104 requests.append( 105 WorkRequest(callable_, item[0], item[1], callback=callback, 106 exc_callback=exc_callback) 107 ) 108 else: 109 requests.append( 110 WorkRequest(callable_, [item], None, callback=callback, 111 exc_callback=exc_callback) 112 ) 113 return requests
114 115 116 # classes
117 -class WorkerThread(threading.Thread):
118 """Background thread connected to the requests/results queues. 119 120 A worker thread sits in the background and picks up work requests from 121 one queue and puts the results in another until it is dismissed. 122 123 """ 124
125 - def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
126 """Set up thread in daemonic mode and start it immediatedly. 127 128 ``requests_queue`` and ``results_queue`` are instances of 129 ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a 130 new worker thread. 131 132 """ 133 threading.Thread.__init__(self, **kwds) 134 self.setDaemon(1) 135 self._requests_queue = requests_queue 136 self._results_queue = results_queue 137 self._poll_timeout = poll_timeout 138 self._dismissed = threading.Event() 139 self.start()
140
141 - def run(self):
142 """Repeatedly process the job queue until told to exit.""" 143 while True: 144 if self._dismissed.isSet(): 145 # we are dismissed, break out of loop 146 break 147 # get next work request. If we don't get a new request from the 148 # queue after self._poll_timout seconds, we jump to the start of 149 # the while loop again, to give the thread a chance to exit. 150 try: 151 request = self._requests_queue.get(True, self._poll_timeout) 152 except Queue.Empty: 153 continue 154 else: 155 if self._dismissed.isSet(): 156 # we are dismissed, put back request in queue and exit loop 157 self._requests_queue.put(request) 158 break 159 try: 160 result = request.callable(*request.args, **request.kwds) 161 self._results_queue.put((request, result)) 162 except: 163 request.exception = True 164 self._results_queue.put((request, sys.exc_info()))
165
166 - def dismiss(self):
167 """Sets a flag to tell the thread to exit when done with current job. 168 """ 169 self._dismissed.set()
170 171
172 -class WorkRequest:
173 """A request to execute a callable for putting in the request queue later. 174 175 See the module function ``makeRequests`` for the common case 176 where you want to build several ``WorkRequest`` objects for the same 177 callable but with different arguments for each call. 178 179 """ 180
181 - def __init__(self, callable_, args=None, kwds=None, requestID=None, 182 callback=None, exc_callback=_handle_thread_exception):
183 """Create a work request for a callable and attach callbacks. 184 185 A work request consists of the a callable to be executed by a 186 worker thread, a list of positional arguments, a dictionary 187 of keyword arguments. 188 189 A ``callback`` function can be specified, that is called when the 190 results of the request are picked up from the result queue. It must 191 accept two anonymous arguments, the ``WorkRequest`` object and the 192 results of the callable, in that order. If you want to pass additional 193 information to the callback, just stick it on the request object. 194 195 You can also give custom callback for when an exception occurs with 196 the ``exc_callback`` keyword parameter. It should also accept two 197 anonymous arguments, the ``WorkRequest`` and a tuple with the exception 198 details as returned by ``sys.exc_info()``. The default implementation 199 of this callback just prints the exception info via 200 ``traceback.print_exception``. If you want no exception handler 201 callback, just pass in ``None``. 202 203 ``requestID``, if given, must be hashable since it is used by 204 ``ThreadPool`` object to store the results of that work request in a 205 dictionary. It defaults to the return value of ``id(self)``. 206 207 """ 208 if requestID is None: 209 self.requestID = id(self) 210 else: 211 try: 212 self.requestID = hash(requestID) 213 except TypeError: 214 raise TypeError("requestID must be hashable.") 215 self.exception = False 216 self.callback = callback 217 self.exc_callback = exc_callback 218 self.callable = callable_ 219 self.args = args or [] 220 self.kwds = kwds or {}
221
222 - def __str__(self):
223 return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ 224 (self.requestID, self.args, self.kwds, self.exception)
225
226 -class ThreadPool:
227 """A thread pool, distributing work requests and collecting results. 228 229 See the module docstring for more information. 230 231 """ 232
233 - def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
234 """Set up the thread pool and start num_workers worker threads. 235 236 ``num_workers`` is the number of worker threads to start initially. 237 238 If ``q_size > 0`` the size of the work *request queue* is limited and 239 the thread pool blocks when the queue is full and it tries to put 240 more work requests in it (see ``putRequest`` method), unless you also 241 use a positive ``timeout`` value for ``putRequest``. 242 243 If ``resq_size > 0`` the size of the *results queue* is limited and the 244 worker threads will block when the queue is full and they try to put 245 new results in it. 246 247 .. warning: 248 If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is 249 the possibilty of a deadlock, when the results queue is not pulled 250 regularly and too many jobs are put in the work requests queue. 251 To prevent this, always set ``timeout > 0`` when calling 252 ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions. 253 254 """ 255 self._requests_queue = Queue.Queue(q_size) 256 self._results_queue = Queue.Queue(resq_size) 257 self.workers = [] 258 self.dismissedWorkers = [] 259 self.workRequests = {} 260 self.createWorkers(num_workers, poll_timeout)
261
262 - def createWorkers(self, num_workers, poll_timeout=5):
263 """Add num_workers worker threads to the pool. 264 265 ``poll_timout`` sets the interval in seconds (int or float) for how 266 ofte threads should check whether they are dismissed, while waiting for 267 requests. 268 269 """ 270 for i in range(num_workers): 271 self.workers.append(WorkerThread(self._requests_queue, 272 self._results_queue, poll_timeout=poll_timeout))
273
274 - def dismissWorkers(self, num_workers, do_join=False):
275 """Tell num_workers worker threads to quit after their current task.""" 276 dismiss_list = [] 277 for i in range(min(num_workers, len(self.workers))): 278 worker = self.workers.pop() 279 worker.dismiss() 280 dismiss_list.append(worker) 281 282 if do_join: 283 for worker in dismiss_list: 284 worker.join() 285 else: 286 self.dismissedWorkers.extend(dismiss_list)
287
288 - def joinAllDismissedWorkers(self):
289 """Perform Thread.join() on all worker threads that have been dismissed. 290 """ 291 for worker in self.dismissedWorkers: 292 worker.join() 293 self.dismissedWorkers = []
294
295 - def putRequest(self, request, block=True, timeout=None):
296 """Put work request into work queue and save its id for later.""" 297 assert isinstance(request, WorkRequest) 298 # don't reuse old work requests 299 assert not getattr(request, 'exception', None) 300 self._requests_queue.put(request, block, timeout) 301 self.workRequests[request.requestID] = request
302
303 - def poll(self, block=False):
304 """Process any new results in the queue.""" 305 while True: 306 # still results pending? 307 if not self.workRequests: 308 raise NoResultsPending 309 # are there still workers to process remaining requests? 310 elif block and not self.workers: 311 raise NoWorkersAvailable 312 try: 313 # get back next results 314 request, result = self._results_queue.get(block=block) 315 # has an exception occured? 316 if request.exception and request.exc_callback: 317 request.exc_callback(request, result) 318 # hand results to callback, if any 319 if request.callback and not \ 320 (request.exception and request.exc_callback): 321 request.callback(request, result) 322 del self.workRequests[request.requestID] 323 except Queue.Empty: 324 break
325
326 - def wait(self):
327 """Wait for results, blocking until all have arrived.""" 328 while 1: 329 try: 330 self.poll(True) 331 except NoResultsPending: 332 break
333 334 335 ################ 336 # USAGE EXAMPLE 337 ################ 338 339 if __name__ == '__main__': 340 import random 341 import time 342 343 # the work the threads will have to do (rather trivial in our example)
344 - def do_something(data):
345 time.sleep(random.randint(1,5)) 346 result = round(random.random() * data, 5) 347 # just to show off, we throw an exception once in a while 348 if result > 5: 349 raise RuntimeError("Something extraordinary happened!") 350 return result
351 352 # this will be called each time a result is available 355 356 # this will be called when an exception occurs within a thread 357 # this example exception handler does little more than the default handler
358 - def handle_exception(request, exc_info):
359 if not isinstance(exc_info, tuple): 360 # Something is seriously wrong... 361 print(request) 362 print(exc_info) 363 raise SystemExit 364 print("**** Exception occured in request #%s: %s" % \ 365 (request.requestID, exc_info))
366 367 # assemble the arguments for each job to a list... 368 data = [random.randint(1,10) for i in range(20)] 369 # ... and build a WorkRequest object for each item in data 370 requests = makeRequests(do_something, data, print_result, handle_exception) 371 # to use the default exception handler, uncomment next line and comment out 372 # the preceding one. 373 #requests = makeRequests(do_something, data, print_result) 374 375 # or the other form of args_lists accepted by makeRequests: ((,), {}) 376 data = [((random.randint(1,10),), {}) for i in range(20)] 377 requests.extend( 378 makeRequests(do_something, data, print_result, handle_exception) 379 #makeRequests(do_something, data, print_result) 380 # to use the default exception handler, uncomment next line and comment 381 # out the preceding one. 382 ) 383 384 # we create a pool of 3 worker threads 385 print("Creating thread pool with 3 worker threads.") 386 main = ThreadPool(3) 387 388 # then we put the work requests in the queue... 389 for req in requests: 390 main.putRequest(req) 391 print("Work request #%s added." % req.requestID) 392 # or shorter: 393 # [main.putRequest(req) for req in requests] 394 395 # ...and wait for the results to arrive in the result queue 396 # by using ThreadPool.wait(). This would block until results for 397 # all work requests have arrived: 398 # main.wait() 399 400 # instead we can poll for results while doing something else: 401 i = 0 402 while True: 403 try: 404 time.sleep(0.5) 405 main.poll() 406 print("Main thread working...") 407 print("(active worker threads: %i)" % (threading.activeCount()-1, )) 408 if i == 10: 409 print("**** Adding 3 more worker threads...") 410 main.createWorkers(3) 411 if i == 20: 412 print("**** Dismissing 2 worker threads...") 413 main.dismissWorkers(2) 414 i += 1 415 except KeyboardInterrupt: 416 print("**** Interrupted!") 417 break 418 except NoResultsPending: 419 print("**** No pending results.") 420 break 421 if main.dismissedWorkers: 422 print("Joining all dismissed worker threads...") 423 main.joinAllDismissedWorkers() 424