Module threadpool
[frames] | no frames]

Source Code for Module threadpool

  1  # -*- coding: UTF-8 -*- 
  2  """Easy to use object-oriented thread pool framework. 
  3   
  4  A thread pool is an object that maintains a pool of worker threads to perform 
  5  time consuming operations in parallel. It assigns jobs to the threads 
  6  by putting them in a work request queue, where they are picked up by the 
  7  next available thread. This then performs the requested operation in the 
  8  background and puts the results in another queue. 
  9   
 10  The thread pool object can then collect the results from all threads from 
 11  this queue as soon as they become available or after all threads have 
 12  finished their work. It's also possible, to define callbacks to handle 
 13  each result as it comes in. 
 14   
 15  The basic concept and some code was taken from the book "Python in a Nutshell" 
 16  by Alex Martelli, copyright 2003, ISBN 0-596-00188-6, from section 14.5 
 17  "Threaded Program Architecture". I wrapped the main program logic in the 
 18  ThreadPool class, added the WorkRequest class and the callback system and 
 19  tweaked the code here and there. Kudos also to Florent Aide for the exception 
 20  handling mechanism. 
 21   
 22  Basic usage:: 
 23   
 24      >>> pool = ThreadPool(poolsize) 
 25      >>> requests = makeRequests(some_callable, list_of_args, callback) 
 26      >>> [pool.putRequest(req) for req in requests] 
 27      >>> pool.wait() 
 28   
 29  See the end of the module code for a brief, annotated usage example. 
 30   
 31  Website : http://chrisarndt.de/projects/threadpool/ 
 32   
 33  """ 
 34   
 35  __all__ = [ 
 36      'makeRequests', 
 37      'NoResultsPending', 
 38      'NoWorkersAvailable', 
 39      'ThreadPool', 
 40      'WorkRequest', 
 41      'WorkerThread' 
 42  ] 
 43   
 44  __author__ = "Christopher Arndt" 
 45  __version__ = "1.2.4" 
 46  __revision__ = "$Revision: 281 $" 
 47  __date__ = "$Date: 2008-05-04 17:41:41 +0200 (So, 04 Mai 2008) $" 
 48  __license__ = 'MIT license' 
 49   
 50   
 51  # standard library modules 
 52  import sys 
 53  import threading 
 54  import Queue 
 55  import traceback 
 56   
 57   
 58  # exceptions 
59 -class NoResultsPending(Exception):
60 """All work requests have been processed.""" 61 pass
62
63 -class NoWorkersAvailable(Exception):
64 """No worker threads available to process remaining requests.""" 65 pass
66 67 68 # internal module helper functions
69 -def _handle_thread_exception(request, exc_info):
70 """Default exception handler callback function. 71 72 This just prints the exception info via ``traceback.print_exception``. 73 74 """ 75 traceback.print_exception(*exc_info)
76 77 78 # utility functions
79 -def makeRequests(callable_, args_list, callback=None, 80 exc_callback=_handle_thread_exception):
81 """Create several work requests for same callable with different arguments. 82 83 Convenience function for creating several work requests for the same 84 callable where each invocation of the callable receives different values 85 for its arguments. 86 87 ``args_list`` contains the parameters for each invocation of callable. 88 Each item in ``args_list`` should be either a 2-item tuple of the list of 89 positional arguments and a dictionary of keyword arguments or a single, 90 non-tuple argument. 91 92 See docstring for ``WorkRequest`` for info on ``callback`` and 93 ``exc_callback``. 94 95 """ 96 requests = [] 97 for item in args_list: 98 if isinstance(item, tuple): 99 requests.append( 100 WorkRequest(callable_, item[0], item[1], callback=callback, 101 exc_callback=exc_callback) 102 ) 103 else: 104 requests.append( 105 WorkRequest(callable_, [item], None, callback=callback, 106 exc_callback=exc_callback) 107 ) 108 return requests
109 110 111 # classes
112 -class WorkerThread(threading.Thread):
113 """Background thread connected to the requests/results queues. 114 115 A worker thread sits in the background and picks up work requests from 116 one queue and puts the results in another until it is dismissed. 117 118 """ 119
120 - def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
121 """Set up thread in daemonic mode and start it immediatedly. 122 123 ``requests_queue`` and ``results_queue`` are instances of 124 ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a new 125 worker thread. 126 127 """ 128 threading.Thread.__init__(self, **kwds) 129 self.setDaemon(1) 130 self._requests_queue = requests_queue 131 self._results_queue = results_queue 132 self._poll_timeout = poll_timeout 133 self._dismissed = threading.Event() 134 self.start()
135
136 - def run(self):
137 """Repeatedly process the job queue until told to exit.""" 138 while True: 139 if self._dismissed.isSet(): 140 # we are dismissed, break out of loop 141 break 142 # get next work request. If we don't get a new request from the 143 # queue after self._poll_timout seconds, we jump to the start of 144 # the while loop again, to give the thread a chance to exit. 145 try: 146 request = self._requests_queue.get(self._poll_timeout) 147 except Queue.Empty: 148 continue 149 else: 150 if self._dismissed.isSet(): 151 # we are dismissed, put back request in queue and exit loop 152 self._reuests_queue.put(request) 153 break 154 try: 155 result = request.callable(*request.args, **request.kwds) 156 self._results_queue.put((request, result)) 157 except: 158 request.exception = True 159 self._results_queue.put((request, sys.exc_info()))
160
161 - def dismiss(self):
162 """Sets a flag to tell the thread to exit when done with current job.""" 163 self._dismissed.set()
164 165
166 -class WorkRequest:
167 """A request to execute a callable for putting in the request queue later. 168 169 See the module function ``makeRequests`` for the common case 170 where you want to build several ``WorkRequest`` objects for the same 171 callable but with different arguments for each call. 172 173 """ 174
175 - def __init__(self, callable_, args=None, kwds=None, requestID=None, 176 callback=None, exc_callback=_handle_thread_exception):
177 """Create a work request for a callable and attach callbacks. 178 179 A work request consists of the a callable to be executed by a 180 worker thread, a list of positional arguments, a dictionary 181 of keyword arguments. 182 183 A ``callback`` function can be specified, that is called when the 184 results of the request are picked up from the result queue. It must 185 accept two anonymous arguments, the ``WorkRequest`` object and the 186 results of the callable, in that order. If you want to pass additional 187 information to the callback, just stick it on the request object. 188 189 You can also give custom callback for when an exception occurs with 190 the ``exc_callback`` keyword parameter. It should also accept two 191 anonymous arguments, the ``WorkRequest`` and a tuple with the exception 192 details as returned by ``sys.exc_info()``. The default implementation 193 of this callback just prints the exception info via 194 ``traceback.print_exception``. If you want no exception handler 195 callback, just pass in ``None``. 196 197 ``requestID``, if given, must be hashable since it is used by 198 ``ThreadPool`` object to store the results of that work request in a 199 dictionary. It defaults to the return value of ``id(self)``. 200 201 """ 202 if requestID is None: 203 self.requestID = id(self) 204 else: 205 try: 206 self.requestID = hash(requestID) 207 except TypeError: 208 raise TypeError("requestID must be hashable.") 209 self.exception = False 210 self.callback = callback 211 self.exc_callback = exc_callback 212 self.callable = callable_ 213 self.args = args or [] 214 self.kwds = kwds or {}
215
216 - def __str__(self):
217 return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ 218 (self.requestID, self.args, self.kwds, self.exception)
219
220 -class ThreadPool:
221 """A thread pool, distributing work requests and collecting results. 222 223 See the module docstring for more information. 224 225 """ 226
227 - def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
228 """Set up the thread pool and start num_workers worker threads. 229 230 ``num_workers`` is the number of worker threads to start initially. 231 232 If ``q_size > 0`` the size of the work *request queue* is limited and 233 the thread pool blocks when the queue is full and it tries to put 234 more work requests in it (see ``putRequest`` method), unless you also 235 use a positive ``timeout`` value for ``putRequest``. 236 237 If ``resq_size > 0`` the size of the *results queue* is limited and the 238 worker threads will block when the queue is full and they try to put 239 new results in it. 240 241 .. warning:: 242 If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is 243 the possibilty of a deadlock, when the results queue is not pulled 244 regularly and too many jobs are put in the work requests queue. 245 To prevent this, always set ``timeout > 0`` when calling 246 ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions. 247 """ 248 self._requests_queue = Queue.Queue(q_size) 249 self._results_queue = Queue.Queue(resq_size) 250 self.workers = [] 251 self.workRequests = {} 252 self.createWorkers(num_workers, poll_timeout)
253
254 - def createWorkers(self, num_workers, poll_timeout=5):
255 """Add num_workers worker threads to the pool. 256 257 ``poll_timout`` sets the interval in seconds (int or float) for how 258 ofte threads should check whether they are dismissed, while waiting for 259 requests. 260 261 """ 262 for i in range(num_workers): 263 self.workers.append(WorkerThread(self._requests_queue, 264 self._results_queue, poll_timeout=poll_timeout))
265
266 - def dismissWorkers(self, num_workers):
267 """Tell num_workers worker threads to quit after their current task.""" 268 for i in range(min(num_workers, len(self.workers))): 269 worker = self.workers.pop() 270 worker.dismiss()
271
272 - def putRequest(self, request, block=True, timeout=0):
273 """Put work request into work queue and save its id for later.""" 274 assert isinstance(request, WorkRequest) 275 # don't reuse old work requests 276 assert not getattr(request, 'exception', None) 277 self._requests_queue.put(request, block, timeout) 278 self.workRequests[request.requestID] = request
279
280 - def poll(self, block=False):
281 """Process any new results in the queue.""" 282 while True: 283 # still results pending? 284 if not self.workRequests: 285 raise NoResultsPending 286 # are there still workers to process remaining requests? 287 elif block and not self.workers: 288 raise NoWorkersAvailable 289 try: 290 # get back next results 291 request, result = self._results_queue.get(block=block) 292 # has an exception occured? 293 if request.exception and request.exc_callback: 294 request.exc_callback(request, result) 295 # hand results to callback, if any 296 if request.callback and not \ 297 (request.exception and request.exc_callback): 298 request.callback(request, result) 299 del self.workRequests[request.requestID] 300 except Queue.Empty: 301 break
302
303 - def wait(self):
304 """Wait for results, blocking until all have arrived.""" 305 while 1: 306 try: 307 self.poll(True) 308 except NoResultsPending: 309 break
310 311 312 ################ 313 # USAGE EXAMPLE 314 ################ 315 316 if __name__ == '__main__': 317 import random 318 import time 319 320 # the work the threads will have to do (rather trivial in our example)
321 - def do_something(data):
322 time.sleep(random.randint(1,5)) 323 result = round(random.random() * data, 5) 324 # just to show off, we throw an exception once in a while 325 if result > 5: 326 raise RuntimeError("Something extraordinary happened!") 327 return result
328 329 # this will be called each time a result is available 332 333 # this will be called when an exception occurs within a thread 334 # this example exception handler does little more than the default handler
335 - def handle_exception(request, exc_info):
336 if not isinstance(exc_info, tuple): 337 # Something is seriously wrong... 338 print request 339 print exc_info 340 raise SystemExit 341 print "**** Exception occured in request #%s: %s" % \ 342 (request.requestID, exc_info)
343 344 # assemble the arguments for each job to a list... 345 data = [random.randint(1,10) for i in range(20)] 346 # ... and build a WorkRequest object for each item in data 347 requests = makeRequests(do_something, data, print_result, handle_exception) 348 # to use the default exception handler, uncomment next line and comment out 349 # the preceding one. 350 #requests = makeRequests(do_something, data, print_result) 351 352 # or the other form of args_lists accepted by makeRequests: ((,), {}) 353 data = [((random.randint(1,10),), {}) for i in range(20)] 354 requests.extend( 355 makeRequests(do_something, data, print_result, handle_exception) 356 #makeRequests(do_something, data, print_result) 357 # to use the default exception handler, uncomment next line and comment 358 # out the preceding one. 359 ) 360 361 # we create a pool of 3 worker threads 362 print "Creating thread pool with 3 worker threads." 363 main = ThreadPool(3) 364 365 # then we put the work requests in the queue... 366 for req in requests: 367 main.putRequest(req) 368 print "Work request #%s added." % req.requestID 369 # or shorter: 370 # [main.putRequest(req) for req in requests] 371 372 # ...and wait for the results to arrive in the result queue 373 # by using ThreadPool.wait(). This would block until results for 374 # all work requests have arrived: 375 # main.wait() 376 377 # instead we can poll for results while doing something else: 378 i = 0 379 while True: 380 try: 381 time.sleep(0.5) 382 main.poll() 383 print "Main thread working...", 384 print "(active worker threads: %i)" % (threading.activeCount()-1, ) 385 if i == 10: 386 print "**** Adding 3 more worker threads..." 387 main.createWorkers(3) 388 if i == 20: 389 print "**** Dismissing 2 worker threads..." 390 main.dismissWorkers(2) 391 i += 1 392 except KeyboardInterrupt: 393 print "**** Interrupted!" 394 break 395 except NoResultsPending: 396 print "**** No pending results." 397 break 398