1
2 """Easy to use object-oriented thread pool framework.
3
4 A thread pool is an object that maintains a pool of worker threads to perform
5 time consuming operations in parallel. It assigns jobs to the threads
6 by putting them in a work request queue, where they are picked up by the
7 next available thread. This then performs the requested operation in the
8 background and puts the results in another queue.
9
10 The thread pool object can then collect the results from all threads from
11 this queue as soon as they become available or after all threads have
12 finished their work. It's also possible, to define callbacks to handle
13 each result as it comes in.
14
15 The basic concept and some code was taken from the book "Python in a Nutshell,
16 2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section
17 14.5 "Threaded Program Architecture". I wrapped the main program logic in the
18 ThreadPool class, added the WorkRequest class and the callback system and
19 tweaked the code here and there. Kudos also to Florent Aide for the exception
20 handling mechanism.
21
22 Basic usage::
23
24 >>> pool = ThreadPool(poolsize)
25 >>> requests = makeRequests(some_callable, list_of_args, callback)
26 >>> [pool.putRequest(req) for req in requests]
27 >>> pool.wait()
28
29 See the end of the module code for a brief, annotated usage example.
30
31 Website : http://chrisarndt.de/projects/threadpool/
32
33 """
34 __docformat__ = "restructuredtext en"
35
36 __all__ = [
37 'makeRequests',
38 'NoResultsPending',
39 'NoWorkersAvailable',
40 'ThreadPool',
41 'WorkRequest',
42 'WorkerThread'
43 ]
44
45 __author__ = "Christopher Arndt"
46 __version__ = '1.3.1'
47 __revision__ = "$Revision$"
48 __date__ = "$Date$"
49 __license__ = "MIT license"
50
51
52
53 import sys
54 import threading
55 import traceback
56
57 try:
58 import Queue
59 except ImportError:
60 import queue as Queue
61
62
63
65 """All work requests have been processed."""
66 pass
67
69 """No worker threads available to process remaining requests."""
70 pass
71
72
73
75 """Default exception handler callback function.
76
77 This just prints the exception info via ``traceback.print_exception``.
78
79 """
80 traceback.print_exception(*exc_info)
81
82
83
84 -def makeRequests(callable_, args_list, callback=None,
85 exc_callback=_handle_thread_exception):
86 """Create several work requests for same callable with different arguments.
87
88 Convenience function for creating several work requests for the same
89 callable where each invocation of the callable receives different values
90 for its arguments.
91
92 ``args_list`` contains the parameters for each invocation of callable.
93 Each item in ``args_list`` should be either a 2-item tuple of the list of
94 positional arguments and a dictionary of keyword arguments or a single,
95 non-tuple argument.
96
97 See docstring for ``WorkRequest`` for info on ``callback`` and
98 ``exc_callback``.
99
100 """
101 requests = []
102 for item in args_list:
103 if isinstance(item, tuple):
104 requests.append(
105 WorkRequest(callable_, item[0], item[1], callback=callback,
106 exc_callback=exc_callback)
107 )
108 else:
109 requests.append(
110 WorkRequest(callable_, [item], None, callback=callback,
111 exc_callback=exc_callback)
112 )
113 return requests
114
115
116
118 """Background thread connected to the requests/results queues.
119
120 A worker thread sits in the background and picks up work requests from
121 one queue and puts the results in another until it is dismissed.
122
123 """
124
125 - def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
126 """Set up thread in daemonic mode and start it immediatedly.
127
128 ``requests_queue`` and ``results_queue`` are instances of
129 ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a
130 new worker thread.
131
132 """
133 threading.Thread.__init__(self, **kwds)
134 self.setDaemon(1)
135 self._requests_queue = requests_queue
136 self._results_queue = results_queue
137 self._poll_timeout = poll_timeout
138 self._dismissed = threading.Event()
139 self.start()
140
142 """Repeatedly process the job queue until told to exit."""
143 while True:
144 if self._dismissed.isSet():
145
146 break
147
148
149
150 try:
151 request = self._requests_queue.get(True, self._poll_timeout)
152 except Queue.Empty:
153 continue
154 else:
155 if self._dismissed.isSet():
156
157 self._requests_queue.put(request)
158 break
159 try:
160 result = request.callable(*request.args, **request.kwds)
161 self._results_queue.put((request, result))
162 except:
163 request.exception = True
164 self._results_queue.put((request, sys.exc_info()))
165
167 """Sets a flag to tell the thread to exit when done with current job.
168 """
169 self._dismissed.set()
170
171
173 """A request to execute a callable for putting in the request queue later.
174
175 See the module function ``makeRequests`` for the common case
176 where you want to build several ``WorkRequest`` objects for the same
177 callable but with different arguments for each call.
178
179 """
180
181 - def __init__(self, callable_, args=None, kwds=None, requestID=None,
182 callback=None, exc_callback=_handle_thread_exception):
183 """Create a work request for a callable and attach callbacks.
184
185 A work request consists of the a callable to be executed by a
186 worker thread, a list of positional arguments, a dictionary
187 of keyword arguments.
188
189 A ``callback`` function can be specified, that is called when the
190 results of the request are picked up from the result queue. It must
191 accept two anonymous arguments, the ``WorkRequest`` object and the
192 results of the callable, in that order. If you want to pass additional
193 information to the callback, just stick it on the request object.
194
195 You can also give custom callback for when an exception occurs with
196 the ``exc_callback`` keyword parameter. It should also accept two
197 anonymous arguments, the ``WorkRequest`` and a tuple with the exception
198 details as returned by ``sys.exc_info()``. The default implementation
199 of this callback just prints the exception info via
200 ``traceback.print_exception``. If you want no exception handler
201 callback, just pass in ``None``.
202
203 ``requestID``, if given, must be hashable since it is used by
204 ``ThreadPool`` object to store the results of that work request in a
205 dictionary. It defaults to the return value of ``id(self)``.
206
207 """
208 if requestID is None:
209 self.requestID = id(self)
210 else:
211 try:
212 self.requestID = hash(requestID)
213 except TypeError:
214 raise TypeError("requestID must be hashable.")
215 self.exception = False
216 self.callback = callback
217 self.exc_callback = exc_callback
218 self.callable = callable_
219 self.args = args or []
220 self.kwds = kwds or {}
221
223 return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \
224 (self.requestID, self.args, self.kwds, self.exception)
225
227 """A thread pool, distributing work requests and collecting results.
228
229 See the module docstring for more information.
230
231 """
232
233 - def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
234 """Set up the thread pool and start num_workers worker threads.
235
236 ``num_workers`` is the number of worker threads to start initially.
237
238 If ``q_size > 0`` the size of the work *request queue* is limited and
239 the thread pool blocks when the queue is full and it tries to put
240 more work requests in it (see ``putRequest`` method), unless you also
241 use a positive ``timeout`` value for ``putRequest``.
242
243 If ``resq_size > 0`` the size of the *results queue* is limited and the
244 worker threads will block when the queue is full and they try to put
245 new results in it.
246
247 .. warning:
248 If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is
249 the possibilty of a deadlock, when the results queue is not pulled
250 regularly and too many jobs are put in the work requests queue.
251 To prevent this, always set ``timeout > 0`` when calling
252 ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions.
253
254 """
255 self._requests_queue = Queue.Queue(q_size)
256 self._results_queue = Queue.Queue(resq_size)
257 self.workers = []
258 self.dismissedWorkers = []
259 self.workRequests = {}
260 self.createWorkers(num_workers, poll_timeout)
261
263 """Add num_workers worker threads to the pool.
264
265 ``poll_timout`` sets the interval in seconds (int or float) for how
266 ofte threads should check whether they are dismissed, while waiting for
267 requests.
268
269 """
270 for i in range(num_workers):
271 self.workers.append(WorkerThread(self._requests_queue,
272 self._results_queue, poll_timeout=poll_timeout))
273
275 """Tell num_workers worker threads to quit after their current task."""
276 dismiss_list = []
277 for i in range(min(num_workers, len(self.workers))):
278 worker = self.workers.pop()
279 worker.dismiss()
280 dismiss_list.append(worker)
281
282 if do_join:
283 for worker in dismiss_list:
284 worker.join()
285 else:
286 self.dismissedWorkers.extend(dismiss_list)
287
289 """Perform Thread.join() on all worker threads that have been dismissed.
290 """
291 for worker in self.dismissedWorkers:
292 worker.join()
293 self.dismissedWorkers = []
294
295 - def putRequest(self, request, block=True, timeout=None):
296 """Put work request into work queue and save its id for later."""
297 assert isinstance(request, WorkRequest)
298
299 assert not getattr(request, 'exception', None)
300 self._requests_queue.put(request, block, timeout)
301 self.workRequests[request.requestID] = request
302
303 - def poll(self, block=False):
304 """Process any new results in the queue."""
305 while True:
306
307 if not self.workRequests:
308 raise NoResultsPending
309
310 elif block and not self.workers:
311 raise NoWorkersAvailable
312 try:
313
314 request, result = self._results_queue.get(block=block)
315
316 if request.exception and request.exc_callback:
317 request.exc_callback(request, result)
318
319 if request.callback and not \
320 (request.exception and request.exc_callback):
321 request.callback(request, result)
322 del self.workRequests[request.requestID]
323 except Queue.Empty:
324 break
325
327 """Wait for results, blocking until all have arrived."""
328 while 1:
329 try:
330 self.poll(True)
331 except NoResultsPending:
332 break
333
334
335
336
337
338
339 if __name__ == '__main__':
340 import random
341 import time
342
343
345 time.sleep(random.randint(1,5))
346 result = round(random.random() * data, 5)
347
348 if result > 5:
349 raise RuntimeError("Something extraordinary happened!")
350 return result
351
352
354 print("**** Result from request #%s: %r" % (request.requestID, result))
355
356
357
359 if not isinstance(exc_info, tuple):
360
361 print(request)
362 print(exc_info)
363 raise SystemExit
364 print("**** Exception occured in request #%s: %s" % \
365 (request.requestID, exc_info))
366
367
368 data = [random.randint(1,10) for i in range(20)]
369
370 requests = makeRequests(do_something, data, print_result, handle_exception)
371
372
373
374
375
376 data = [((random.randint(1,10),), {}) for i in range(20)]
377 requests.extend(
378 makeRequests(do_something, data, print_result, handle_exception)
379
380
381
382 )
383
384
385 print("Creating thread pool with 3 worker threads.")
386 main = ThreadPool(3)
387
388
389 for req in requests:
390 main.putRequest(req)
391 print("Work request #%s added." % req.requestID)
392
393
394
395
396
397
398
399
400
401 i = 0
402 while True:
403 try:
404 time.sleep(0.5)
405 main.poll()
406 print("Main thread working...")
407 print("(active worker threads: %i)" % (threading.activeCount()-1, ))
408 if i == 10:
409 print("**** Adding 3 more worker threads...")
410 main.createWorkers(3)
411 if i == 20:
412 print("**** Dismissing 2 worker threads...")
413 main.dismissWorkers(2)
414 i += 1
415 except KeyboardInterrupt:
416 print("**** Interrupted!")
417 break
418 except NoResultsPending:
419 print("**** No pending results.")
420 break
421 if main.dismissedWorkers:
422 print("Joining all dismissed worker threads...")
423 main.joinAllDismissedWorkers()
424