Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/more_itertools/more.py : 1%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import warnings
3from collections import Counter, defaultdict, deque, abc
4from collections.abc import Sequence
5from concurrent.futures import ThreadPoolExecutor
6from functools import partial, reduce, wraps
7from heapq import merge, heapify, heapreplace, heappop
8from itertools import (
9 chain,
10 compress,
11 count,
12 cycle,
13 dropwhile,
14 groupby,
15 islice,
16 repeat,
17 starmap,
18 takewhile,
19 tee,
20 zip_longest,
21)
22from math import exp, factorial, floor, log
23from queue import Empty, Queue
24from random import random, randrange, uniform
25from operator import itemgetter, mul, sub, gt, lt
26from sys import hexversion, maxsize
27from time import monotonic
29from .recipes import (
30 consume,
31 flatten,
32 pairwise,
33 powerset,
34 take,
35 unique_everseen,
36)
38__all__ = [
39 'AbortThread',
40 'adjacent',
41 'always_iterable',
42 'always_reversible',
43 'bucket',
44 'callback_iter',
45 'chunked',
46 'circular_shifts',
47 'collapse',
48 'collate',
49 'consecutive_groups',
50 'consumer',
51 'count_cycle',
52 'mark_ends',
53 'difference',
54 'distinct_combinations',
55 'distinct_permutations',
56 'distribute',
57 'divide',
58 'exactly_n',
59 'filter_except',
60 'first',
61 'groupby_transform',
62 'ilen',
63 'interleave_longest',
64 'interleave',
65 'intersperse',
66 'islice_extended',
67 'iterate',
68 'ichunked',
69 'is_sorted',
70 'last',
71 'locate',
72 'lstrip',
73 'make_decorator',
74 'map_except',
75 'map_reduce',
76 'nth_or_last',
77 'nth_permutation',
78 'nth_product',
79 'numeric_range',
80 'one',
81 'only',
82 'padded',
83 'partitions',
84 'set_partitions',
85 'peekable',
86 'repeat_last',
87 'replace',
88 'rlocate',
89 'rstrip',
90 'run_length',
91 'sample',
92 'seekable',
93 'SequenceView',
94 'side_effect',
95 'sliced',
96 'sort_together',
97 'split_at',
98 'split_after',
99 'split_before',
100 'split_when',
101 'split_into',
102 'spy',
103 'stagger',
104 'strip',
105 'substrings',
106 'substrings_indexes',
107 'time_limited',
108 'unique_to_each',
109 'unzip',
110 'windowed',
111 'with_iter',
112 'UnequalIterablesError',
113 'zip_equal',
114 'zip_offset',
115 'windowed_complete',
116 'all_unique',
117 'value_chain',
118 'product_index',
119 'combination_index',
120 'permutation_index',
121]
123_marker = object()
126def chunked(iterable, n, strict=False):
127 """Break *iterable* into lists of length *n*:
129 >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
130 [[1, 2, 3], [4, 5, 6]]
132 By the default, the last yielded list will have fewer than *n* elements
133 if the length of *iterable* is not divisible by *n*:
135 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
136 [[1, 2, 3], [4, 5, 6], [7, 8]]
138 To use a fill-in value instead, see the :func:`grouper` recipe.
140 If the length of *iterable* is not divisible by *n* and *strict* is
141 ``True``, then ``ValueError`` will be raised before the last
142 list is yielded.
144 """
145 iterator = iter(partial(take, n, iter(iterable)), [])
146 if strict:
148 def ret():
149 for chunk in iterator:
150 if len(chunk) != n:
151 raise ValueError('iterable is not divisible by n.')
152 yield chunk
154 return iter(ret())
155 else:
156 return iterator
159def first(iterable, default=_marker):
160 """Return the first item of *iterable*, or *default* if *iterable* is
161 empty.
163 >>> first([0, 1, 2, 3])
164 0
165 >>> first([], 'some default')
166 'some default'
168 If *default* is not provided and there are no items in the iterable,
169 raise ``ValueError``.
171 :func:`first` is useful when you have a generator of expensive-to-retrieve
172 values and want any arbitrary one. It is marginally shorter than
173 ``next(iter(iterable), default)``.
175 """
176 try:
177 return next(iter(iterable))
178 except StopIteration as e:
179 if default is _marker:
180 raise ValueError(
181 'first() was called on an empty iterable, and no '
182 'default value was provided.'
183 ) from e
184 return default
187def last(iterable, default=_marker):
188 """Return the last item of *iterable*, or *default* if *iterable* is
189 empty.
191 >>> last([0, 1, 2, 3])
192 3
193 >>> last([], 'some default')
194 'some default'
196 If *default* is not provided and there are no items in the iterable,
197 raise ``ValueError``.
198 """
199 try:
200 if isinstance(iterable, Sequence):
201 return iterable[-1]
202 # Work around https://bugs.python.org/issue38525
203 elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0):
204 return next(reversed(iterable))
205 else:
206 return deque(iterable, maxlen=1)[-1]
207 except (IndexError, TypeError, StopIteration):
208 if default is _marker:
209 raise ValueError(
210 'last() was called on an empty iterable, and no default was '
211 'provided.'
212 )
213 return default
216def nth_or_last(iterable, n, default=_marker):
217 """Return the nth or the last item of *iterable*,
218 or *default* if *iterable* is empty.
220 >>> nth_or_last([0, 1, 2, 3], 2)
221 2
222 >>> nth_or_last([0, 1], 2)
223 1
224 >>> nth_or_last([], 0, 'some default')
225 'some default'
227 If *default* is not provided and there are no items in the iterable,
228 raise ``ValueError``.
229 """
230 return last(islice(iterable, n + 1), default=default)
233class peekable:
234 """Wrap an iterator to allow lookahead and prepending elements.
236 Call :meth:`peek` on the result to get the value that will be returned
237 by :func:`next`. This won't advance the iterator:
239 >>> p = peekable(['a', 'b'])
240 >>> p.peek()
241 'a'
242 >>> next(p)
243 'a'
245 Pass :meth:`peek` a default value to return that instead of raising
246 ``StopIteration`` when the iterator is exhausted.
248 >>> p = peekable([])
249 >>> p.peek('hi')
250 'hi'
252 peekables also offer a :meth:`prepend` method, which "inserts" items
253 at the head of the iterable:
255 >>> p = peekable([1, 2, 3])
256 >>> p.prepend(10, 11, 12)
257 >>> next(p)
258 10
259 >>> p.peek()
260 11
261 >>> list(p)
262 [11, 12, 1, 2, 3]
264 peekables can be indexed. Index 0 is the item that will be returned by
265 :func:`next`, index 1 is the item after that, and so on:
266 The values up to the given index will be cached.
268 >>> p = peekable(['a', 'b', 'c', 'd'])
269 >>> p[0]
270 'a'
271 >>> p[1]
272 'b'
273 >>> next(p)
274 'a'
276 Negative indexes are supported, but be aware that they will cache the
277 remaining items in the source iterator, which may require significant
278 storage.
280 To check whether a peekable is exhausted, check its truth value:
282 >>> p = peekable(['a', 'b'])
283 >>> if p: # peekable has items
284 ... list(p)
285 ['a', 'b']
286 >>> if not p: # peekable is exhausted
287 ... list(p)
288 []
290 """
292 def __init__(self, iterable):
293 self._it = iter(iterable)
294 self._cache = deque()
296 def __iter__(self):
297 return self
299 def __bool__(self):
300 try:
301 self.peek()
302 except StopIteration:
303 return False
304 return True
306 def peek(self, default=_marker):
307 """Return the item that will be next returned from ``next()``.
309 Return ``default`` if there are no items left. If ``default`` is not
310 provided, raise ``StopIteration``.
312 """
313 if not self._cache:
314 try:
315 self._cache.append(next(self._it))
316 except StopIteration:
317 if default is _marker:
318 raise
319 return default
320 return self._cache[0]
322 def prepend(self, *items):
323 """Stack up items to be the next ones returned from ``next()`` or
324 ``self.peek()``. The items will be returned in
325 first in, first out order::
327 >>> p = peekable([1, 2, 3])
328 >>> p.prepend(10, 11, 12)
329 >>> next(p)
330 10
331 >>> list(p)
332 [11, 12, 1, 2, 3]
334 It is possible, by prepending items, to "resurrect" a peekable that
335 previously raised ``StopIteration``.
337 >>> p = peekable([])
338 >>> next(p)
339 Traceback (most recent call last):
340 ...
341 StopIteration
342 >>> p.prepend(1)
343 >>> next(p)
344 1
345 >>> next(p)
346 Traceback (most recent call last):
347 ...
348 StopIteration
350 """
351 self._cache.extendleft(reversed(items))
353 def __next__(self):
354 if self._cache:
355 return self._cache.popleft()
357 return next(self._it)
359 def _get_slice(self, index):
360 # Normalize the slice's arguments
361 step = 1 if (index.step is None) else index.step
362 if step > 0:
363 start = 0 if (index.start is None) else index.start
364 stop = maxsize if (index.stop is None) else index.stop
365 elif step < 0:
366 start = -1 if (index.start is None) else index.start
367 stop = (-maxsize - 1) if (index.stop is None) else index.stop
368 else:
369 raise ValueError('slice step cannot be zero')
371 # If either the start or stop index is negative, we'll need to cache
372 # the rest of the iterable in order to slice from the right side.
373 if (start < 0) or (stop < 0):
374 self._cache.extend(self._it)
375 # Otherwise we'll need to find the rightmost index and cache to that
376 # point.
377 else:
378 n = min(max(start, stop) + 1, maxsize)
379 cache_len = len(self._cache)
380 if n >= cache_len:
381 self._cache.extend(islice(self._it, n - cache_len))
383 return list(self._cache)[index]
385 def __getitem__(self, index):
386 if isinstance(index, slice):
387 return self._get_slice(index)
389 cache_len = len(self._cache)
390 if index < 0:
391 self._cache.extend(self._it)
392 elif index >= cache_len:
393 self._cache.extend(islice(self._it, index + 1 - cache_len))
395 return self._cache[index]
398def collate(*iterables, **kwargs):
399 """Return a sorted merge of the items from each of several already-sorted
400 *iterables*.
402 >>> list(collate('ACDZ', 'AZ', 'JKL'))
403 ['A', 'A', 'C', 'D', 'J', 'K', 'L', 'Z', 'Z']
405 Works lazily, keeping only the next value from each iterable in memory. Use
406 :func:`collate` to, for example, perform a n-way mergesort of items that
407 don't fit in memory.
409 If a *key* function is specified, the iterables will be sorted according
410 to its result:
412 >>> key = lambda s: int(s) # Sort by numeric value, not by string
413 >>> list(collate(['1', '10'], ['2', '11'], key=key))
414 ['1', '2', '10', '11']
417 If the *iterables* are sorted in descending order, set *reverse* to
418 ``True``:
420 >>> list(collate([5, 3, 1], [4, 2, 0], reverse=True))
421 [5, 4, 3, 2, 1, 0]
423 If the elements of the passed-in iterables are out of order, you might get
424 unexpected results.
426 On Python 3.5+, this function is an alias for :func:`heapq.merge`.
428 """
429 warnings.warn(
430 "collate is no longer part of more_itertools, use heapq.merge",
431 DeprecationWarning,
432 )
433 return merge(*iterables, **kwargs)
436def consumer(func):
437 """Decorator that automatically advances a PEP-342-style "reverse iterator"
438 to its first yield point so you don't have to call ``next()`` on it
439 manually.
441 >>> @consumer
442 ... def tally():
443 ... i = 0
444 ... while True:
445 ... print('Thing number %s is %s.' % (i, (yield)))
446 ... i += 1
447 ...
448 >>> t = tally()
449 >>> t.send('red')
450 Thing number 0 is red.
451 >>> t.send('fish')
452 Thing number 1 is fish.
454 Without the decorator, you would have to call ``next(t)`` before
455 ``t.send()`` could be used.
457 """
459 @wraps(func)
460 def wrapper(*args, **kwargs):
461 gen = func(*args, **kwargs)
462 next(gen)
463 return gen
465 return wrapper
468def ilen(iterable):
469 """Return the number of items in *iterable*.
471 >>> ilen(x for x in range(1000000) if x % 3 == 0)
472 333334
474 This consumes the iterable, so handle with care.
476 """
477 # This approach was selected because benchmarks showed it's likely the
478 # fastest of the known implementations at the time of writing.
479 # See GitHub tracker: #236, #230.
480 counter = count()
481 deque(zip(iterable, counter), maxlen=0)
482 return next(counter)
485def iterate(func, start):
486 """Return ``start``, ``func(start)``, ``func(func(start))``, ...
488 >>> from itertools import islice
489 >>> list(islice(iterate(lambda x: 2*x, 1), 10))
490 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
492 """
493 while True:
494 yield start
495 start = func(start)
498def with_iter(context_manager):
499 """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
501 For example, this will close the file when the iterator is exhausted::
503 upper_lines = (line.upper() for line in with_iter(open('foo')))
505 Any context manager which returns an iterable is a candidate for
506 ``with_iter``.
508 """
509 with context_manager as iterable:
510 yield from iterable
513def one(iterable, too_short=None, too_long=None):
514 """Return the first item from *iterable*, which is expected to contain only
515 that item. Raise an exception if *iterable* is empty or has more than one
516 item.
518 :func:`one` is useful for ensuring that an iterable contains only one item.
519 For example, it can be used to retrieve the result of a database query
520 that is expected to return a single row.
522 If *iterable* is empty, ``ValueError`` will be raised. You may specify a
523 different exception with the *too_short* keyword:
525 >>> it = []
526 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
527 Traceback (most recent call last):
528 ...
529 ValueError: too many items in iterable (expected 1)'
530 >>> too_short = IndexError('too few items')
531 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
532 Traceback (most recent call last):
533 ...
534 IndexError: too few items
536 Similarly, if *iterable* contains more than one item, ``ValueError`` will
537 be raised. You may specify a different exception with the *too_long*
538 keyword:
540 >>> it = ['too', 'many']
541 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
542 Traceback (most recent call last):
543 ...
544 ValueError: Expected exactly one item in iterable, but got 'too',
545 'many', and perhaps more.
546 >>> too_long = RuntimeError
547 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
548 Traceback (most recent call last):
549 ...
550 RuntimeError
552 Note that :func:`one` attempts to advance *iterable* twice to ensure there
553 is only one item. See :func:`spy` or :func:`peekable` to check iterable
554 contents less destructively.
556 """
557 it = iter(iterable)
559 try:
560 first_value = next(it)
561 except StopIteration as e:
562 raise (
563 too_short or ValueError('too few items in iterable (expected 1)')
564 ) from e
566 try:
567 second_value = next(it)
568 except StopIteration:
569 pass
570 else:
571 msg = (
572 'Expected exactly one item in iterable, but got {!r}, {!r}, '
573 'and perhaps more.'.format(first_value, second_value)
574 )
575 raise too_long or ValueError(msg)
577 return first_value
580def distinct_permutations(iterable, r=None):
581 """Yield successive distinct permutations of the elements in *iterable*.
583 >>> sorted(distinct_permutations([1, 0, 1]))
584 [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
586 Equivalent to ``set(permutations(iterable))``, except duplicates are not
587 generated and thrown away. For larger input sequences this is much more
588 efficient.
590 Duplicate permutations arise when there are duplicated elements in the
591 input iterable. The number of items returned is
592 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
593 items input, and each `x_i` is the count of a distinct item in the input
594 sequence.
596 If *r* is given, only the *r*-length permutations are yielded.
598 >>> sorted(distinct_permutations([1, 0, 1], r=2))
599 [(0, 1), (1, 0), (1, 1)]
600 >>> sorted(distinct_permutations(range(3), r=2))
601 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
603 """
604 # Algorithm: https://w.wiki/Qai
605 def _full(A):
606 while True:
607 # Yield the permutation we have
608 yield tuple(A)
610 # Find the largest index i such that A[i] < A[i + 1]
611 for i in range(size - 2, -1, -1):
612 if A[i] < A[i + 1]:
613 break
614 # If no such index exists, this permutation is the last one
615 else:
616 return
618 # Find the largest index j greater than j such that A[i] < A[j]
619 for j in range(size - 1, i, -1):
620 if A[i] < A[j]:
621 break
623 # Swap the value of A[i] with that of A[j], then reverse the
624 # sequence from A[i + 1] to form the new permutation
625 A[i], A[j] = A[j], A[i]
626 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
628 # Algorithm: modified from the above
629 def _partial(A, r):
630 # Split A into the first r items and the last r items
631 head, tail = A[:r], A[r:]
632 right_head_indexes = range(r - 1, -1, -1)
633 left_tail_indexes = range(len(tail))
635 while True:
636 # Yield the permutation we have
637 yield tuple(head)
639 # Starting from the right, find the first index of the head with
640 # value smaller than the maximum value of the tail - call it i.
641 pivot = tail[-1]
642 for i in right_head_indexes:
643 if head[i] < pivot:
644 break
645 pivot = head[i]
646 else:
647 return
649 # Starting from the left, find the first value of the tail
650 # with a value greater than head[i] and swap.
651 for j in left_tail_indexes:
652 if tail[j] > head[i]:
653 head[i], tail[j] = tail[j], head[i]
654 break
655 # If we didn't find one, start from the right and find the first
656 # index of the head with a value greater than head[i] and swap.
657 else:
658 for j in right_head_indexes:
659 if head[j] > head[i]:
660 head[i], head[j] = head[j], head[i]
661 break
663 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
664 tail += head[: i - r : -1] # head[i + 1:][::-1]
665 i += 1
666 head[i:], tail[:] = tail[: r - i], tail[r - i :]
668 items = sorted(iterable)
670 size = len(items)
671 if r is None:
672 r = size
674 if 0 < r <= size:
675 return _full(items) if (r == size) else _partial(items, r)
677 return iter(() if r else ((),))
680def intersperse(e, iterable, n=1):
681 """Intersperse filler element *e* among the items in *iterable*, leaving
682 *n* items between each filler element.
684 >>> list(intersperse('!', [1, 2, 3, 4, 5]))
685 [1, '!', 2, '!', 3, '!', 4, '!', 5]
687 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
688 [1, 2, None, 3, 4, None, 5]
690 """
691 if n == 0:
692 raise ValueError('n must be > 0')
693 elif n == 1:
694 # interleave(repeat(e), iterable) -> e, x_0, e, e, x_1, e, x_2...
695 # islice(..., 1, None) -> x_0, e, e, x_1, e, x_2...
696 return islice(interleave(repeat(e), iterable), 1, None)
697 else:
698 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
699 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
700 # flatten(...) -> x_0, x_1, e, x_2, x_3...
701 filler = repeat([e])
702 chunks = chunked(iterable, n)
703 return flatten(islice(interleave(filler, chunks), 1, None))
706def unique_to_each(*iterables):
707 """Return the elements from each of the input iterables that aren't in the
708 other input iterables.
710 For example, suppose you have a set of packages, each with a set of
711 dependencies::
713 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
715 If you remove one package, which dependencies can also be removed?
717 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
718 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
719 ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
721 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
722 [['A'], ['C'], ['D']]
724 If there are duplicates in one input iterable that aren't in the others
725 they will be duplicated in the output. Input order is preserved::
727 >>> unique_to_each("mississippi", "missouri")
728 [['p', 'p'], ['o', 'u', 'r']]
730 It is assumed that the elements of each iterable are hashable.
732 """
733 pool = [list(it) for it in iterables]
734 counts = Counter(chain.from_iterable(map(set, pool)))
735 uniques = {element for element in counts if counts[element] == 1}
736 return [list(filter(uniques.__contains__, it)) for it in pool]
739def windowed(seq, n, fillvalue=None, step=1):
740 """Return a sliding window of width *n* over the given iterable.
742 >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
743 >>> list(all_windows)
744 [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
746 When the window is larger than the iterable, *fillvalue* is used in place
747 of missing values:
749 >>> list(windowed([1, 2, 3], 4))
750 [(1, 2, 3, None)]
752 Each window will advance in increments of *step*:
754 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
755 [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
757 To slide into the iterable's items, use :func:`chain` to add filler items
758 to the left:
760 >>> iterable = [1, 2, 3, 4]
761 >>> n = 3
762 >>> padding = [None] * (n - 1)
763 >>> list(windowed(chain(padding, iterable), 3))
764 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
765 """
766 if n < 0:
767 raise ValueError('n must be >= 0')
768 if n == 0:
769 yield tuple()
770 return
771 if step < 1:
772 raise ValueError('step must be >= 1')
774 window = deque(maxlen=n)
775 i = n
776 for _ in map(window.append, seq):
777 i -= 1
778 if not i:
779 i = step
780 yield tuple(window)
782 size = len(window)
783 if size < n:
784 yield tuple(chain(window, repeat(fillvalue, n - size)))
785 elif 0 < i < min(step, n):
786 window += (fillvalue,) * i
787 yield tuple(window)
790def substrings(iterable):
791 """Yield all of the substrings of *iterable*.
793 >>> [''.join(s) for s in substrings('more')]
794 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
796 Note that non-string iterables can also be subdivided.
798 >>> list(substrings([0, 1, 2]))
799 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
801 """
802 # The length-1 substrings
803 seq = []
804 for item in iter(iterable):
805 seq.append(item)
806 yield (item,)
807 seq = tuple(seq)
808 item_count = len(seq)
810 # And the rest
811 for n in range(2, item_count + 1):
812 for i in range(item_count - n + 1):
813 yield seq[i : i + n]
816def substrings_indexes(seq, reverse=False):
817 """Yield all substrings and their positions in *seq*
819 The items yielded will be a tuple of the form ``(substr, i, j)``, where
820 ``substr == seq[i:j]``.
822 This function only works for iterables that support slicing, such as
823 ``str`` objects.
825 >>> for item in substrings_indexes('more'):
826 ... print(item)
827 ('m', 0, 1)
828 ('o', 1, 2)
829 ('r', 2, 3)
830 ('e', 3, 4)
831 ('mo', 0, 2)
832 ('or', 1, 3)
833 ('re', 2, 4)
834 ('mor', 0, 3)
835 ('ore', 1, 4)
836 ('more', 0, 4)
838 Set *reverse* to ``True`` to yield the same items in the opposite order.
841 """
842 r = range(1, len(seq) + 1)
843 if reverse:
844 r = reversed(r)
845 return (
846 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
847 )
850class bucket:
851 """Wrap *iterable* and return an object that buckets it iterable into
852 child iterables based on a *key* function.
854 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
855 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
856 >>> sorted(list(s)) # Get the keys
857 ['a', 'b', 'c']
858 >>> a_iterable = s['a']
859 >>> next(a_iterable)
860 'a1'
861 >>> next(a_iterable)
862 'a2'
863 >>> list(s['b'])
864 ['b1', 'b2', 'b3']
866 The original iterable will be advanced and its items will be cached until
867 they are used by the child iterables. This may require significant storage.
869 By default, attempting to select a bucket to which no items belong will
870 exhaust the iterable and cache all values.
871 If you specify a *validator* function, selected buckets will instead be
872 checked against it.
874 >>> from itertools import count
875 >>> it = count(1, 2) # Infinite sequence of odd numbers
876 >>> key = lambda x: x % 10 # Bucket by last digit
877 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
878 >>> s = bucket(it, key=key, validator=validator)
879 >>> 2 in s
880 False
881 >>> list(s[2])
882 []
884 """
886 def __init__(self, iterable, key, validator=None):
887 self._it = iter(iterable)
888 self._key = key
889 self._cache = defaultdict(deque)
890 self._validator = validator or (lambda x: True)
892 def __contains__(self, value):
893 if not self._validator(value):
894 return False
896 try:
897 item = next(self[value])
898 except StopIteration:
899 return False
900 else:
901 self._cache[value].appendleft(item)
903 return True
905 def _get_values(self, value):
906 """
907 Helper to yield items from the parent iterator that match *value*.
908 Items that don't match are stored in the local cache as they
909 are encountered.
910 """
911 while True:
912 # If we've cached some items that match the target value, emit
913 # the first one and evict it from the cache.
914 if self._cache[value]:
915 yield self._cache[value].popleft()
916 # Otherwise we need to advance the parent iterator to search for
917 # a matching item, caching the rest.
918 else:
919 while True:
920 try:
921 item = next(self._it)
922 except StopIteration:
923 return
924 item_value = self._key(item)
925 if item_value == value:
926 yield item
927 break
928 elif self._validator(item_value):
929 self._cache[item_value].append(item)
931 def __iter__(self):
932 for item in self._it:
933 item_value = self._key(item)
934 if self._validator(item_value):
935 self._cache[item_value].append(item)
937 yield from self._cache.keys()
939 def __getitem__(self, value):
940 if not self._validator(value):
941 return iter(())
943 return self._get_values(value)
946def spy(iterable, n=1):
947 """Return a 2-tuple with a list containing the first *n* elements of
948 *iterable*, and an iterator with the same items as *iterable*.
949 This allows you to "look ahead" at the items in the iterable without
950 advancing it.
952 There is one item in the list by default:
954 >>> iterable = 'abcdefg'
955 >>> head, iterable = spy(iterable)
956 >>> head
957 ['a']
958 >>> list(iterable)
959 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
961 You may use unpacking to retrieve items instead of lists:
963 >>> (head,), iterable = spy('abcdefg')
964 >>> head
965 'a'
966 >>> (first, second), iterable = spy('abcdefg', 2)
967 >>> first
968 'a'
969 >>> second
970 'b'
972 The number of items requested can be larger than the number of items in
973 the iterable:
975 >>> iterable = [1, 2, 3, 4, 5]
976 >>> head, iterable = spy(iterable, 10)
977 >>> head
978 [1, 2, 3, 4, 5]
979 >>> list(iterable)
980 [1, 2, 3, 4, 5]
982 """
983 it = iter(iterable)
984 head = take(n, it)
986 return head.copy(), chain(head, it)
989def interleave(*iterables):
990 """Return a new iterable yielding from each iterable in turn,
991 until the shortest is exhausted.
993 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
994 [1, 4, 6, 2, 5, 7]
996 For a version that doesn't terminate after the shortest iterable is
997 exhausted, see :func:`interleave_longest`.
999 """
1000 return chain.from_iterable(zip(*iterables))
1003def interleave_longest(*iterables):
1004 """Return a new iterable yielding from each iterable in turn,
1005 skipping any that are exhausted.
1007 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
1008 [1, 4, 6, 2, 5, 7, 3, 8]
1010 This function produces the same output as :func:`roundrobin`, but may
1011 perform better for some inputs (in particular when the number of iterables
1012 is large).
1014 """
1015 i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
1016 return (x for x in i if x is not _marker)
1019def collapse(iterable, base_type=None, levels=None):
1020 """Flatten an iterable with multiple levels of nesting (e.g., a list of
1021 lists of tuples) into non-iterable types.
1023 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
1024 >>> list(collapse(iterable))
1025 [1, 2, 3, 4, 5, 6]
1027 Binary and text strings are not considered iterable and
1028 will not be collapsed.
1030 To avoid collapsing other types, specify *base_type*:
1032 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
1033 >>> list(collapse(iterable, base_type=tuple))
1034 ['ab', ('cd', 'ef'), 'gh', 'ij']
1036 Specify *levels* to stop flattening after a certain level:
1038 >>> iterable = [('a', ['b']), ('c', ['d'])]
1039 >>> list(collapse(iterable)) # Fully flattened
1040 ['a', 'b', 'c', 'd']
1041 >>> list(collapse(iterable, levels=1)) # Only one level flattened
1042 ['a', ['b'], 'c', ['d']]
1044 """
1046 def walk(node, level):
1047 if (
1048 ((levels is not None) and (level > levels))
1049 or isinstance(node, (str, bytes))
1050 or ((base_type is not None) and isinstance(node, base_type))
1051 ):
1052 yield node
1053 return
1055 try:
1056 tree = iter(node)
1057 except TypeError:
1058 yield node
1059 return
1060 else:
1061 for child in tree:
1062 yield from walk(child, level + 1)
1064 yield from walk(iterable, 0)
1067def side_effect(func, iterable, chunk_size=None, before=None, after=None):
1068 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
1069 of items) before yielding the item.
1071 `func` must be a function that takes a single argument. Its return value
1072 will be discarded.
1074 *before* and *after* are optional functions that take no arguments. They
1075 will be executed before iteration starts and after it ends, respectively.
1077 `side_effect` can be used for logging, updating progress bars, or anything
1078 that is not functionally "pure."
1080 Emitting a status message:
1082 >>> from more_itertools import consume
1083 >>> func = lambda item: print('Received {}'.format(item))
1084 >>> consume(side_effect(func, range(2)))
1085 Received 0
1086 Received 1
1088 Operating on chunks of items:
1090 >>> pair_sums = []
1091 >>> func = lambda chunk: pair_sums.append(sum(chunk))
1092 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
1093 [0, 1, 2, 3, 4, 5]
1094 >>> list(pair_sums)
1095 [1, 5, 9]
1097 Writing to a file-like object:
1099 >>> from io import StringIO
1100 >>> from more_itertools import consume
1101 >>> f = StringIO()
1102 >>> func = lambda x: print(x, file=f)
1103 >>> before = lambda: print(u'HEADER', file=f)
1104 >>> after = f.close
1105 >>> it = [u'a', u'b', u'c']
1106 >>> consume(side_effect(func, it, before=before, after=after))
1107 >>> f.closed
1108 True
1110 """
1111 try:
1112 if before is not None:
1113 before()
1115 if chunk_size is None:
1116 for item in iterable:
1117 func(item)
1118 yield item
1119 else:
1120 for chunk in chunked(iterable, chunk_size):
1121 func(chunk)
1122 yield from chunk
1123 finally:
1124 if after is not None:
1125 after()
1128def sliced(seq, n, strict=False):
1129 """Yield slices of length *n* from the sequence *seq*.
1131 >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
1132 [(1, 2, 3), (4, 5, 6)]
1134 By the default, the last yielded slice will have fewer than *n* elements
1135 if the length of *seq* is not divisible by *n*:
1137 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
1138 [(1, 2, 3), (4, 5, 6), (7, 8)]
1140 If the length of *seq* is not divisible by *n* and *strict* is
1141 ``True``, then ``ValueError`` will be raised before the last
1142 slice is yielded.
1144 This function will only work for iterables that support slicing.
1145 For non-sliceable iterables, see :func:`chunked`.
1147 """
1148 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
1149 if strict:
1151 def ret():
1152 for _slice in iterator:
1153 if len(_slice) != n:
1154 raise ValueError("seq is not divisible by n.")
1155 yield _slice
1157 return iter(ret())
1158 else:
1159 return iterator
1162def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
1163 """Yield lists of items from *iterable*, where each list is delimited by
1164 an item where callable *pred* returns ``True``.
1166 >>> list(split_at('abcdcba', lambda x: x == 'b'))
1167 [['a'], ['c', 'd', 'c'], ['a']]
1169 >>> list(split_at(range(10), lambda n: n % 2 == 1))
1170 [[0], [2], [4], [6], [8], []]
1172 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1173 then there is no limit on the number of splits:
1175 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
1176 [[0], [2], [4, 5, 6, 7, 8, 9]]
1178 By default, the delimiting items are not included in the output.
1179 The include them, set *keep_separator* to ``True``.
1181 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
1182 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
1184 """
1185 if maxsplit == 0:
1186 yield list(iterable)
1187 return
1189 buf = []
1190 it = iter(iterable)
1191 for item in it:
1192 if pred(item):
1193 yield buf
1194 if keep_separator:
1195 yield [item]
1196 if maxsplit == 1:
1197 yield list(it)
1198 return
1199 buf = []
1200 maxsplit -= 1
1201 else:
1202 buf.append(item)
1203 yield buf
1206def split_before(iterable, pred, maxsplit=-1):
1207 """Yield lists of items from *iterable*, where each list ends just before
1208 an item for which callable *pred* returns ``True``:
1210 >>> list(split_before('OneTwo', lambda s: s.isupper()))
1211 [['O', 'n', 'e'], ['T', 'w', 'o']]
1213 >>> list(split_before(range(10), lambda n: n % 3 == 0))
1214 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
1216 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1217 then there is no limit on the number of splits:
1219 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
1220 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
1221 """
1222 if maxsplit == 0:
1223 yield list(iterable)
1224 return
1226 buf = []
1227 it = iter(iterable)
1228 for item in it:
1229 if pred(item) and buf:
1230 yield buf
1231 if maxsplit == 1:
1232 yield [item] + list(it)
1233 return
1234 buf = []
1235 maxsplit -= 1
1236 buf.append(item)
1237 yield buf
1240def split_after(iterable, pred, maxsplit=-1):
1241 """Yield lists of items from *iterable*, where each list ends with an
1242 item where callable *pred* returns ``True``:
1244 >>> list(split_after('one1two2', lambda s: s.isdigit()))
1245 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
1247 >>> list(split_after(range(10), lambda n: n % 3 == 0))
1248 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
1250 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1251 then there is no limit on the number of splits:
1253 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
1254 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
1256 """
1257 if maxsplit == 0:
1258 yield list(iterable)
1259 return
1261 buf = []
1262 it = iter(iterable)
1263 for item in it:
1264 buf.append(item)
1265 if pred(item) and buf:
1266 yield buf
1267 if maxsplit == 1:
1268 yield list(it)
1269 return
1270 buf = []
1271 maxsplit -= 1
1272 if buf:
1273 yield buf
1276def split_when(iterable, pred, maxsplit=-1):
1277 """Split *iterable* into pieces based on the output of *pred*.
1278 *pred* should be a function that takes successive pairs of items and
1279 returns ``True`` if the iterable should be split in between them.
1281 For example, to find runs of increasing numbers, split the iterable when
1282 element ``i`` is larger than element ``i + 1``:
1284 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
1285 [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
1287 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1288 then there is no limit on the number of splits:
1290 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
1291 ... lambda x, y: x > y, maxsplit=2))
1292 [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
1294 """
1295 if maxsplit == 0:
1296 yield list(iterable)
1297 return
1299 it = iter(iterable)
1300 try:
1301 cur_item = next(it)
1302 except StopIteration:
1303 return
1305 buf = [cur_item]
1306 for next_item in it:
1307 if pred(cur_item, next_item):
1308 yield buf
1309 if maxsplit == 1:
1310 yield [next_item] + list(it)
1311 return
1312 buf = []
1313 maxsplit -= 1
1315 buf.append(next_item)
1316 cur_item = next_item
1318 yield buf
1321def split_into(iterable, sizes):
1322 """Yield a list of sequential items from *iterable* of length 'n' for each
1323 integer 'n' in *sizes*.
1325 >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1326 [[1], [2, 3], [4, 5, 6]]
1328 If the sum of *sizes* is smaller than the length of *iterable*, then the
1329 remaining items of *iterable* will not be returned.
1331 >>> list(split_into([1,2,3,4,5,6], [2,3]))
1332 [[1, 2], [3, 4, 5]]
1334 If the sum of *sizes* is larger than the length of *iterable*, fewer items
1335 will be returned in the iteration that overruns *iterable* and further
1336 lists will be empty:
1338 >>> list(split_into([1,2,3,4], [1,2,3,4]))
1339 [[1], [2, 3], [4], []]
1341 When a ``None`` object is encountered in *sizes*, the returned list will
1342 contain items up to the end of *iterable* the same way that itertools.slice
1343 does:
1345 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1346 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1348 :func:`split_into` can be useful for grouping a series of items where the
1349 sizes of the groups are not uniform. An example would be where in a row
1350 from a table, multiple columns represent elements of the same feature
1351 (e.g. a point represented by x,y,z) but, the format is not the same for
1352 all columns.
1353 """
1354 # convert the iterable argument into an iterator so its contents can
1355 # be consumed by islice in case it is a generator
1356 it = iter(iterable)
1358 for size in sizes:
1359 if size is None:
1360 yield list(it)
1361 return
1362 else:
1363 yield list(islice(it, size))
1366def padded(iterable, fillvalue=None, n=None, next_multiple=False):
1367 """Yield the elements from *iterable*, followed by *fillvalue*, such that
1368 at least *n* items are emitted.
1370 >>> list(padded([1, 2, 3], '?', 5))
1371 [1, 2, 3, '?', '?']
1373 If *next_multiple* is ``True``, *fillvalue* will be emitted until the
1374 number of items emitted is a multiple of *n*::
1376 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
1377 [1, 2, 3, 4, None, None]
1379 If *n* is ``None``, *fillvalue* will be emitted indefinitely.
1381 """
1382 it = iter(iterable)
1383 if n is None:
1384 yield from chain(it, repeat(fillvalue))
1385 elif n < 1:
1386 raise ValueError('n must be at least 1')
1387 else:
1388 item_count = 0
1389 for item in it:
1390 yield item
1391 item_count += 1
1393 remaining = (n - item_count) % n if next_multiple else n - item_count
1394 for _ in range(remaining):
1395 yield fillvalue
1398def repeat_last(iterable, default=None):
1399 """After the *iterable* is exhausted, keep yielding its last element.
1401 >>> list(islice(repeat_last(range(3)), 5))
1402 [0, 1, 2, 2, 2]
1404 If the iterable is empty, yield *default* forever::
1406 >>> list(islice(repeat_last(range(0), 42), 5))
1407 [42, 42, 42, 42, 42]
1409 """
1410 item = _marker
1411 for item in iterable:
1412 yield item
1413 final = default if item is _marker else item
1414 yield from repeat(final)
1417def distribute(n, iterable):
1418 """Distribute the items from *iterable* among *n* smaller iterables.
1420 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
1421 >>> list(group_1)
1422 [1, 3, 5]
1423 >>> list(group_2)
1424 [2, 4, 6]
1426 If the length of *iterable* is not evenly divisible by *n*, then the
1427 length of the returned iterables will not be identical:
1429 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
1430 >>> [list(c) for c in children]
1431 [[1, 4, 7], [2, 5], [3, 6]]
1433 If the length of *iterable* is smaller than *n*, then the last returned
1434 iterables will be empty:
1436 >>> children = distribute(5, [1, 2, 3])
1437 >>> [list(c) for c in children]
1438 [[1], [2], [3], [], []]
1440 This function uses :func:`itertools.tee` and may require significant
1441 storage. If you need the order items in the smaller iterables to match the
1442 original iterable, see :func:`divide`.
1444 """
1445 if n < 1:
1446 raise ValueError('n must be at least 1')
1448 children = tee(iterable, n)
1449 return [islice(it, index, None, n) for index, it in enumerate(children)]
1452def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
1453 """Yield tuples whose elements are offset from *iterable*.
1454 The amount by which the `i`-th item in each tuple is offset is given by
1455 the `i`-th item in *offsets*.
1457 >>> list(stagger([0, 1, 2, 3]))
1458 [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
1459 >>> list(stagger(range(8), offsets=(0, 2, 4)))
1460 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
1462 By default, the sequence will end when the final element of a tuple is the
1463 last item in the iterable. To continue until the first element of a tuple
1464 is the last item in the iterable, set *longest* to ``True``::
1466 >>> list(stagger([0, 1, 2, 3], longest=True))
1467 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
1469 By default, ``None`` will be used to replace offsets beyond the end of the
1470 sequence. Specify *fillvalue* to use some other value.
1472 """
1473 children = tee(iterable, len(offsets))
1475 return zip_offset(
1476 *children, offsets=offsets, longest=longest, fillvalue=fillvalue
1477 )
1480class UnequalIterablesError(ValueError):
1481 def __init__(self, details=None):
1482 msg = 'Iterables have different lengths'
1483 if details is not None:
1484 msg += (': index 0 has length {}; index {} has length {}').format(
1485 *details
1486 )
1488 super().__init__(msg)
1491def zip_equal(*iterables):
1492 """``zip`` the input *iterables* together, but raise
1493 ``UnequalIterablesError`` if they aren't all the same length.
1495 >>> it_1 = range(3)
1496 >>> it_2 = iter('abc')
1497 >>> list(zip_equal(it_1, it_2))
1498 [(0, 'a'), (1, 'b'), (2, 'c')]
1500 >>> it_1 = range(3)
1501 >>> it_2 = iter('abcd')
1502 >>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL
1503 Traceback (most recent call last):
1504 ...
1505 more_itertools.more.UnequalIterablesError: Iterables have different
1506 lengths
1508 """
1509 # Check whether the iterables are all the same size.
1510 try:
1511 first_size = len(iterables[0])
1512 for i, it in enumerate(iterables[1:], 1):
1513 size = len(it)
1514 if size != first_size:
1515 break
1516 else:
1517 # If we didn't break out, we can use the built-in zip.
1518 return zip(*iterables)
1520 # If we did break out, there was a mismatch.
1521 raise UnequalIterablesError(details=(first_size, i, size))
1522 # If any one of the iterables didn't have a length, start reading
1523 # them until one runs out.
1524 except TypeError:
1525 return _zip_equal_generator(iterables)
1528def _zip_equal_generator(iterables):
1529 for combo in zip_longest(*iterables, fillvalue=_marker):
1530 for val in combo:
1531 if val is _marker:
1532 raise UnequalIterablesError()
1533 yield combo
1536def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
1537 """``zip`` the input *iterables* together, but offset the `i`-th iterable
1538 by the `i`-th item in *offsets*.
1540 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
1541 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
1543 This can be used as a lightweight alternative to SciPy or pandas to analyze
1544 data sets in which some series have a lead or lag relationship.
1546 By default, the sequence will end when the shortest iterable is exhausted.
1547 To continue until the longest iterable is exhausted, set *longest* to
1548 ``True``.
1550 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
1551 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
1553 By default, ``None`` will be used to replace offsets beyond the end of the
1554 sequence. Specify *fillvalue* to use some other value.
1556 """
1557 if len(iterables) != len(offsets):
1558 raise ValueError("Number of iterables and offsets didn't match")
1560 staggered = []
1561 for it, n in zip(iterables, offsets):
1562 if n < 0:
1563 staggered.append(chain(repeat(fillvalue, -n), it))
1564 elif n > 0:
1565 staggered.append(islice(it, n, None))
1566 else:
1567 staggered.append(it)
1569 if longest:
1570 return zip_longest(*staggered, fillvalue=fillvalue)
1572 return zip(*staggered)
1575def sort_together(iterables, key_list=(0,), key=None, reverse=False):
1576 """Return the input iterables sorted together, with *key_list* as the
1577 priority for sorting. All iterables are trimmed to the length of the
1578 shortest one.
1580 This can be used like the sorting function in a spreadsheet. If each
1581 iterable represents a column of data, the key list determines which
1582 columns are used for sorting.
1584 By default, all iterables are sorted using the ``0``-th iterable::
1586 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
1587 >>> sort_together(iterables)
1588 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
1590 Set a different key list to sort according to another iterable.
1591 Specifying multiple keys dictates how ties are broken::
1593 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
1594 >>> sort_together(iterables, key_list=(1, 2))
1595 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
1597 To sort by a function of the elements of the iterable, pass a *key*
1598 function. Its arguments are the elements of the iterables corresponding to
1599 the key list::
1601 >>> names = ('a', 'b', 'c')
1602 >>> lengths = (1, 2, 3)
1603 >>> widths = (5, 2, 1)
1604 >>> def area(length, width):
1605 ... return length * width
1606 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
1607 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
1609 Set *reverse* to ``True`` to sort in descending order.
1611 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
1612 [(3, 2, 1), ('a', 'b', 'c')]
1614 """
1615 if key is None:
1616 # if there is no key function, the key argument to sorted is an
1617 # itemgetter
1618 key_argument = itemgetter(*key_list)
1619 else:
1620 # if there is a key function, call it with the items at the offsets
1621 # specified by the key function as arguments
1622 key_list = list(key_list)
1623 if len(key_list) == 1:
1624 # if key_list contains a single item, pass the item at that offset
1625 # as the only argument to the key function
1626 key_offset = key_list[0]
1627 key_argument = lambda zipped_items: key(zipped_items[key_offset])
1628 else:
1629 # if key_list contains multiple items, use itemgetter to return a
1630 # tuple of items, which we pass as *args to the key function
1631 get_key_items = itemgetter(*key_list)
1632 key_argument = lambda zipped_items: key(
1633 *get_key_items(zipped_items)
1634 )
1636 return list(
1637 zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse))
1638 )
1641def unzip(iterable):
1642 """The inverse of :func:`zip`, this function disaggregates the elements
1643 of the zipped *iterable*.
1645 The ``i``-th iterable contains the ``i``-th element from each element
1646 of the zipped iterable. The first element is used to to determine the
1647 length of the remaining elements.
1649 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
1650 >>> letters, numbers = unzip(iterable)
1651 >>> list(letters)
1652 ['a', 'b', 'c', 'd']
1653 >>> list(numbers)
1654 [1, 2, 3, 4]
1656 This is similar to using ``zip(*iterable)``, but it avoids reading
1657 *iterable* into memory. Note, however, that this function uses
1658 :func:`itertools.tee` and thus may require significant storage.
1660 """
1661 head, iterable = spy(iter(iterable))
1662 if not head:
1663 # empty iterable, e.g. zip([], [], [])
1664 return ()
1665 # spy returns a one-length iterable as head
1666 head = head[0]
1667 iterables = tee(iterable, len(head))
1669 def itemgetter(i):
1670 def getter(obj):
1671 try:
1672 return obj[i]
1673 except IndexError:
1674 # basically if we have an iterable like
1675 # iter([(1, 2, 3), (4, 5), (6,)])
1676 # the second unzipped iterable would fail at the third tuple
1677 # since it would try to access tup[1]
1678 # same with the third unzipped iterable and the second tuple
1679 # to support these "improperly zipped" iterables,
1680 # we create a custom itemgetter
1681 # which just stops the unzipped iterables
1682 # at first length mismatch
1683 raise StopIteration
1685 return getter
1687 return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))
1690def divide(n, iterable):
1691 """Divide the elements from *iterable* into *n* parts, maintaining
1692 order.
1694 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
1695 >>> list(group_1)
1696 [1, 2, 3]
1697 >>> list(group_2)
1698 [4, 5, 6]
1700 If the length of *iterable* is not evenly divisible by *n*, then the
1701 length of the returned iterables will not be identical:
1703 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
1704 >>> [list(c) for c in children]
1705 [[1, 2, 3], [4, 5], [6, 7]]
1707 If the length of the iterable is smaller than n, then the last returned
1708 iterables will be empty:
1710 >>> children = divide(5, [1, 2, 3])
1711 >>> [list(c) for c in children]
1712 [[1], [2], [3], [], []]
1714 This function will exhaust the iterable before returning and may require
1715 significant storage. If order is not important, see :func:`distribute`,
1716 which does not first pull the iterable into memory.
1718 """
1719 if n < 1:
1720 raise ValueError('n must be at least 1')
1722 try:
1723 iterable[:0]
1724 except TypeError:
1725 seq = tuple(iterable)
1726 else:
1727 seq = iterable
1729 q, r = divmod(len(seq), n)
1731 ret = []
1732 stop = 0
1733 for i in range(1, n + 1):
1734 start = stop
1735 stop += q + 1 if i <= r else q
1736 ret.append(iter(seq[start:stop]))
1738 return ret
1741def always_iterable(obj, base_type=(str, bytes)):
1742 """If *obj* is iterable, return an iterator over its items::
1744 >>> obj = (1, 2, 3)
1745 >>> list(always_iterable(obj))
1746 [1, 2, 3]
1748 If *obj* is not iterable, return a one-item iterable containing *obj*::
1750 >>> obj = 1
1751 >>> list(always_iterable(obj))
1752 [1]
1754 If *obj* is ``None``, return an empty iterable:
1756 >>> obj = None
1757 >>> list(always_iterable(None))
1758 []
1760 By default, binary and text strings are not considered iterable::
1762 >>> obj = 'foo'
1763 >>> list(always_iterable(obj))
1764 ['foo']
1766 If *base_type* is set, objects for which ``isinstance(obj, base_type)``
1767 returns ``True`` won't be considered iterable.
1769 >>> obj = {'a': 1}
1770 >>> list(always_iterable(obj)) # Iterate over the dict's keys
1771 ['a']
1772 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
1773 [{'a': 1}]
1775 Set *base_type* to ``None`` to avoid any special handling and treat objects
1776 Python considers iterable as iterable:
1778 >>> obj = 'foo'
1779 >>> list(always_iterable(obj, base_type=None))
1780 ['f', 'o', 'o']
1781 """
1782 if obj is None:
1783 return iter(())
1785 if (base_type is not None) and isinstance(obj, base_type):
1786 return iter((obj,))
1788 try:
1789 return iter(obj)
1790 except TypeError:
1791 return iter((obj,))
1794def adjacent(predicate, iterable, distance=1):
1795 """Return an iterable over `(bool, item)` tuples where the `item` is
1796 drawn from *iterable* and the `bool` indicates whether
1797 that item satisfies the *predicate* or is adjacent to an item that does.
1799 For example, to find whether items are adjacent to a ``3``::
1801 >>> list(adjacent(lambda x: x == 3, range(6)))
1802 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
1804 Set *distance* to change what counts as adjacent. For example, to find
1805 whether items are two places away from a ``3``:
1807 >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
1808 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
1810 This is useful for contextualizing the results of a search function.
1811 For example, a code comparison tool might want to identify lines that
1812 have changed, but also surrounding lines to give the viewer of the diff
1813 context.
1815 The predicate function will only be called once for each item in the
1816 iterable.
1818 See also :func:`groupby_transform`, which can be used with this function
1819 to group ranges of items with the same `bool` value.
1821 """
1822 # Allow distance=0 mainly for testing that it reproduces results with map()
1823 if distance < 0:
1824 raise ValueError('distance must be at least 0')
1826 i1, i2 = tee(iterable)
1827 padding = [False] * distance
1828 selected = chain(padding, map(predicate, i1), padding)
1829 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
1830 return zip(adjacent_to_selected, i2)
1833def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
1834 """An extension of :func:`itertools.groupby` that can apply transformations
1835 to the grouped data.
1837 * *keyfunc* is a function computing a key value for each item in *iterable*
1838 * *valuefunc* is a function that transforms the individual items from
1839 *iterable* after grouping
1840 * *reducefunc* is a function that transforms each group of items
1842 >>> iterable = 'aAAbBBcCC'
1843 >>> keyfunc = lambda k: k.upper()
1844 >>> valuefunc = lambda v: v.lower()
1845 >>> reducefunc = lambda g: ''.join(g)
1846 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
1847 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
1849 Each optional argument defaults to an identity function if not specified.
1851 :func:`groupby_transform` is useful when grouping elements of an iterable
1852 using a separate iterable as the key. To do this, :func:`zip` the iterables
1853 and pass a *keyfunc* that extracts the first element and a *valuefunc*
1854 that extracts the second element::
1856 >>> from operator import itemgetter
1857 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
1858 >>> values = 'abcdefghi'
1859 >>> iterable = zip(keys, values)
1860 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
1861 >>> [(k, ''.join(g)) for k, g in grouper]
1862 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
1864 Note that the order of items in the iterable is significant.
1865 Only adjacent items are grouped together, so if you don't want any
1866 duplicate groups, you should sort the iterable by the key function.
1868 """
1869 ret = groupby(iterable, keyfunc)
1870 if valuefunc:
1871 ret = ((k, map(valuefunc, g)) for k, g in ret)
1872 if reducefunc:
1873 ret = ((k, reducefunc(g)) for k, g in ret)
1875 return ret
1878class numeric_range(abc.Sequence, abc.Hashable):
1879 """An extension of the built-in ``range()`` function whose arguments can
1880 be any orderable numeric type.
1882 With only *stop* specified, *start* defaults to ``0`` and *step*
1883 defaults to ``1``. The output items will match the type of *stop*:
1885 >>> list(numeric_range(3.5))
1886 [0.0, 1.0, 2.0, 3.0]
1888 With only *start* and *stop* specified, *step* defaults to ``1``. The
1889 output items will match the type of *start*:
1891 >>> from decimal import Decimal
1892 >>> start = Decimal('2.1')
1893 >>> stop = Decimal('5.1')
1894 >>> list(numeric_range(start, stop))
1895 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
1897 With *start*, *stop*, and *step* specified the output items will match
1898 the type of ``start + step``:
1900 >>> from fractions import Fraction
1901 >>> start = Fraction(1, 2) # Start at 1/2
1902 >>> stop = Fraction(5, 2) # End at 5/2
1903 >>> step = Fraction(1, 2) # Count by 1/2
1904 >>> list(numeric_range(start, stop, step))
1905 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
1907 If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
1909 >>> list(numeric_range(3, -1, -1.0))
1910 [3.0, 2.0, 1.0, 0.0]
1912 Be aware of the limitations of floating point numbers; the representation
1913 of the yielded numbers may be surprising.
1915 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
1916 is a ``datetime.timedelta`` object:
1918 >>> import datetime
1919 >>> start = datetime.datetime(2019, 1, 1)
1920 >>> stop = datetime.datetime(2019, 1, 3)
1921 >>> step = datetime.timedelta(days=1)
1922 >>> items = iter(numeric_range(start, stop, step))
1923 >>> next(items)
1924 datetime.datetime(2019, 1, 1, 0, 0)
1925 >>> next(items)
1926 datetime.datetime(2019, 1, 2, 0, 0)
1928 """
1930 _EMPTY_HASH = hash(range(0, 0))
1932 def __init__(self, *args):
1933 argc = len(args)
1934 if argc == 1:
1935 (self._stop,) = args
1936 self._start = type(self._stop)(0)
1937 self._step = type(self._stop - self._start)(1)
1938 elif argc == 2:
1939 self._start, self._stop = args
1940 self._step = type(self._stop - self._start)(1)
1941 elif argc == 3:
1942 self._start, self._stop, self._step = args
1943 elif argc == 0:
1944 raise TypeError(
1945 'numeric_range expected at least '
1946 '1 argument, got {}'.format(argc)
1947 )
1948 else:
1949 raise TypeError(
1950 'numeric_range expected at most '
1951 '3 arguments, got {}'.format(argc)
1952 )
1954 self._zero = type(self._step)(0)
1955 if self._step == self._zero:
1956 raise ValueError('numeric_range() arg 3 must not be zero')
1957 self._growing = self._step > self._zero
1958 self._init_len()
1960 def __bool__(self):
1961 if self._growing:
1962 return self._start < self._stop
1963 else:
1964 return self._start > self._stop
1966 def __contains__(self, elem):
1967 if self._growing:
1968 if self._start <= elem < self._stop:
1969 return (elem - self._start) % self._step == self._zero
1970 else:
1971 if self._start >= elem > self._stop:
1972 return (self._start - elem) % (-self._step) == self._zero
1974 return False
1976 def __eq__(self, other):
1977 if isinstance(other, numeric_range):
1978 empty_self = not bool(self)
1979 empty_other = not bool(other)
1980 if empty_self or empty_other:
1981 return empty_self and empty_other # True if both empty
1982 else:
1983 return (
1984 self._start == other._start
1985 and self._step == other._step
1986 and self._get_by_index(-1) == other._get_by_index(-1)
1987 )
1988 else:
1989 return False
1991 def __getitem__(self, key):
1992 if isinstance(key, int):
1993 return self._get_by_index(key)
1994 elif isinstance(key, slice):
1995 step = self._step if key.step is None else key.step * self._step
1997 if key.start is None or key.start <= -self._len:
1998 start = self._start
1999 elif key.start >= self._len:
2000 start = self._stop
2001 else: # -self._len < key.start < self._len
2002 start = self._get_by_index(key.start)
2004 if key.stop is None or key.stop >= self._len:
2005 stop = self._stop
2006 elif key.stop <= -self._len:
2007 stop = self._start
2008 else: # -self._len < key.stop < self._len
2009 stop = self._get_by_index(key.stop)
2011 return numeric_range(start, stop, step)
2012 else:
2013 raise TypeError(
2014 'numeric range indices must be '
2015 'integers or slices, not {}'.format(type(key).__name__)
2016 )
2018 def __hash__(self):
2019 if self:
2020 return hash((self._start, self._get_by_index(-1), self._step))
2021 else:
2022 return self._EMPTY_HASH
2024 def __iter__(self):
2025 values = (self._start + (n * self._step) for n in count())
2026 if self._growing:
2027 return takewhile(partial(gt, self._stop), values)
2028 else:
2029 return takewhile(partial(lt, self._stop), values)
2031 def __len__(self):
2032 return self._len
2034 def _init_len(self):
2035 if self._growing:
2036 start = self._start
2037 stop = self._stop
2038 step = self._step
2039 else:
2040 start = self._stop
2041 stop = self._start
2042 step = -self._step
2043 distance = stop - start
2044 if distance <= self._zero:
2045 self._len = 0
2046 else: # distance > 0 and step > 0: regular euclidean division
2047 q, r = divmod(distance, step)
2048 self._len = int(q) + int(r != self._zero)
2050 def __reduce__(self):
2051 return numeric_range, (self._start, self._stop, self._step)
2053 def __repr__(self):
2054 if self._step == 1:
2055 return "numeric_range({}, {})".format(
2056 repr(self._start), repr(self._stop)
2057 )
2058 else:
2059 return "numeric_range({}, {}, {})".format(
2060 repr(self._start), repr(self._stop), repr(self._step)
2061 )
2063 def __reversed__(self):
2064 return iter(
2065 numeric_range(
2066 self._get_by_index(-1), self._start - self._step, -self._step
2067 )
2068 )
2070 def count(self, value):
2071 return int(value in self)
2073 def index(self, value):
2074 if self._growing:
2075 if self._start <= value < self._stop:
2076 q, r = divmod(value - self._start, self._step)
2077 if r == self._zero:
2078 return int(q)
2079 else:
2080 if self._start >= value > self._stop:
2081 q, r = divmod(self._start - value, -self._step)
2082 if r == self._zero:
2083 return int(q)
2085 raise ValueError("{} is not in numeric range".format(value))
2087 def _get_by_index(self, i):
2088 if i < 0:
2089 i += self._len
2090 if i < 0 or i >= self._len:
2091 raise IndexError("numeric range object index out of range")
2092 return self._start + i * self._step
2095def count_cycle(iterable, n=None):
2096 """Cycle through the items from *iterable* up to *n* times, yielding
2097 the number of completed cycles along with each item. If *n* is omitted the
2098 process repeats indefinitely.
2100 >>> list(count_cycle('AB', 3))
2101 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
2103 """
2104 iterable = tuple(iterable)
2105 if not iterable:
2106 return iter(())
2107 counter = count() if n is None else range(n)
2108 return ((i, item) for i in counter for item in iterable)
2111def mark_ends(iterable):
2112 """Yield 3-tuples of the form ``(is_first, is_last, item)``.
2114 >>> list(mark_ends('ABC'))
2115 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
2117 Use this when looping over an iterable to take special action on its first
2118 and/or last items:
2120 >>> iterable = ['Header', 100, 200, 'Footer']
2121 >>> total = 0
2122 >>> for is_first, is_last, item in mark_ends(iterable):
2123 ... if is_first:
2124 ... continue # Skip the header
2125 ... if is_last:
2126 ... continue # Skip the footer
2127 ... total += item
2128 >>> print(total)
2129 300
2130 """
2131 it = iter(iterable)
2133 try:
2134 b = next(it)
2135 except StopIteration:
2136 return
2138 try:
2139 for i in count():
2140 a = b
2141 b = next(it)
2142 yield i == 0, False, a
2144 except StopIteration:
2145 yield i == 0, True, a
2148def locate(iterable, pred=bool, window_size=None):
2149 """Yield the index of each item in *iterable* for which *pred* returns
2150 ``True``.
2152 *pred* defaults to :func:`bool`, which will select truthy items:
2154 >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
2155 [1, 2, 4]
2157 Set *pred* to a custom function to, e.g., find the indexes for a particular
2158 item.
2160 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
2161 [1, 3]
2163 If *window_size* is given, then the *pred* function will be called with
2164 that many items. This enables searching for sub-sequences:
2166 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2167 >>> pred = lambda *args: args == (1, 2, 3)
2168 >>> list(locate(iterable, pred=pred, window_size=3))
2169 [1, 5, 9]
2171 Use with :func:`seekable` to find indexes and then retrieve the associated
2172 items:
2174 >>> from itertools import count
2175 >>> from more_itertools import seekable
2176 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
2177 >>> it = seekable(source)
2178 >>> pred = lambda x: x > 100
2179 >>> indexes = locate(it, pred=pred)
2180 >>> i = next(indexes)
2181 >>> it.seek(i)
2182 >>> next(it)
2183 106
2185 """
2186 if window_size is None:
2187 return compress(count(), map(pred, iterable))
2189 if window_size < 1:
2190 raise ValueError('window size must be at least 1')
2192 it = windowed(iterable, window_size, fillvalue=_marker)
2193 return compress(count(), starmap(pred, it))
2196def lstrip(iterable, pred):
2197 """Yield the items from *iterable*, but strip any from the beginning
2198 for which *pred* returns ``True``.
2200 For example, to remove a set of items from the start of an iterable:
2202 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2203 >>> pred = lambda x: x in {None, False, ''}
2204 >>> list(lstrip(iterable, pred))
2205 [1, 2, None, 3, False, None]
2207 This function is analogous to to :func:`str.lstrip`, and is essentially
2208 an wrapper for :func:`itertools.dropwhile`.
2210 """
2211 return dropwhile(pred, iterable)
2214def rstrip(iterable, pred):
2215 """Yield the items from *iterable*, but strip any from the end
2216 for which *pred* returns ``True``.
2218 For example, to remove a set of items from the end of an iterable:
2220 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2221 >>> pred = lambda x: x in {None, False, ''}
2222 >>> list(rstrip(iterable, pred))
2223 [None, False, None, 1, 2, None, 3]
2225 This function is analogous to :func:`str.rstrip`.
2227 """
2228 cache = []
2229 cache_append = cache.append
2230 cache_clear = cache.clear
2231 for x in iterable:
2232 if pred(x):
2233 cache_append(x)
2234 else:
2235 yield from cache
2236 cache_clear()
2237 yield x
2240def strip(iterable, pred):
2241 """Yield the items from *iterable*, but strip any from the
2242 beginning and end for which *pred* returns ``True``.
2244 For example, to remove a set of items from both ends of an iterable:
2246 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2247 >>> pred = lambda x: x in {None, False, ''}
2248 >>> list(strip(iterable, pred))
2249 [1, 2, None, 3]
2251 This function is analogous to :func:`str.strip`.
2253 """
2254 return rstrip(lstrip(iterable, pred), pred)
2257class islice_extended:
2258 """An extension of :func:`itertools.islice` that supports negative values
2259 for *stop*, *start*, and *step*.
2261 >>> iterable = iter('abcdefgh')
2262 >>> list(islice_extended(iterable, -4, -1))
2263 ['e', 'f', 'g']
2265 Slices with negative values require some caching of *iterable*, but this
2266 function takes care to minimize the amount of memory required.
2268 For example, you can use a negative step with an infinite iterator:
2270 >>> from itertools import count
2271 >>> list(islice_extended(count(), 110, 99, -2))
2272 [110, 108, 106, 104, 102, 100]
2274 You can also use slice notation directly:
2276 >>> iterable = map(str, count())
2277 >>> it = islice_extended(iterable)[10:20:2]
2278 >>> list(it)
2279 ['10', '12', '14', '16', '18']
2281 """
2283 def __init__(self, iterable, *args):
2284 it = iter(iterable)
2285 if args:
2286 self._iterable = _islice_helper(it, slice(*args))
2287 else:
2288 self._iterable = it
2290 def __iter__(self):
2291 return self
2293 def __next__(self):
2294 return next(self._iterable)
2296 def __getitem__(self, key):
2297 if isinstance(key, slice):
2298 return islice_extended(_islice_helper(self._iterable, key))
2300 raise TypeError('islice_extended.__getitem__ argument must be a slice')
2303def _islice_helper(it, s):
2304 start = s.start
2305 stop = s.stop
2306 if s.step == 0:
2307 raise ValueError('step argument must be a non-zero integer or None.')
2308 step = s.step or 1
2310 if step > 0:
2311 start = 0 if (start is None) else start
2313 if start < 0:
2314 # Consume all but the last -start items
2315 cache = deque(enumerate(it, 1), maxlen=-start)
2316 len_iter = cache[-1][0] if cache else 0
2318 # Adjust start to be positive
2319 i = max(len_iter + start, 0)
2321 # Adjust stop to be positive
2322 if stop is None:
2323 j = len_iter
2324 elif stop >= 0:
2325 j = min(stop, len_iter)
2326 else:
2327 j = max(len_iter + stop, 0)
2329 # Slice the cache
2330 n = j - i
2331 if n <= 0:
2332 return
2334 for index, item in islice(cache, 0, n, step):
2335 yield item
2336 elif (stop is not None) and (stop < 0):
2337 # Advance to the start position
2338 next(islice(it, start, start), None)
2340 # When stop is negative, we have to carry -stop items while
2341 # iterating
2342 cache = deque(islice(it, -stop), maxlen=-stop)
2344 for index, item in enumerate(it):
2345 cached_item = cache.popleft()
2346 if index % step == 0:
2347 yield cached_item
2348 cache.append(item)
2349 else:
2350 # When both start and stop are positive we have the normal case
2351 yield from islice(it, start, stop, step)
2352 else:
2353 start = -1 if (start is None) else start
2355 if (stop is not None) and (stop < 0):
2356 # Consume all but the last items
2357 n = -stop - 1
2358 cache = deque(enumerate(it, 1), maxlen=n)
2359 len_iter = cache[-1][0] if cache else 0
2361 # If start and stop are both negative they are comparable and
2362 # we can just slice. Otherwise we can adjust start to be negative
2363 # and then slice.
2364 if start < 0:
2365 i, j = start, stop
2366 else:
2367 i, j = min(start - len_iter, -1), None
2369 for index, item in list(cache)[i:j:step]:
2370 yield item
2371 else:
2372 # Advance to the stop position
2373 if stop is not None:
2374 m = stop + 1
2375 next(islice(it, m, m), None)
2377 # stop is positive, so if start is negative they are not comparable
2378 # and we need the rest of the items.
2379 if start < 0:
2380 i = start
2381 n = None
2382 # stop is None and start is positive, so we just need items up to
2383 # the start index.
2384 elif stop is None:
2385 i = None
2386 n = start + 1
2387 # Both stop and start are positive, so they are comparable.
2388 else:
2389 i = None
2390 n = start - stop
2391 if n <= 0:
2392 return
2394 cache = list(islice(it, n))
2396 yield from cache[i::step]
2399def always_reversible(iterable):
2400 """An extension of :func:`reversed` that supports all iterables, not
2401 just those which implement the ``Reversible`` or ``Sequence`` protocols.
2403 >>> print(*always_reversible(x for x in range(3)))
2404 2 1 0
2406 If the iterable is already reversible, this function returns the
2407 result of :func:`reversed()`. If the iterable is not reversible,
2408 this function will cache the remaining items in the iterable and
2409 yield them in reverse order, which may require significant storage.
2410 """
2411 try:
2412 return reversed(iterable)
2413 except TypeError:
2414 return reversed(list(iterable))
2417def consecutive_groups(iterable, ordering=lambda x: x):
2418 """Yield groups of consecutive items using :func:`itertools.groupby`.
2419 The *ordering* function determines whether two items are adjacent by
2420 returning their position.
2422 By default, the ordering function is the identity function. This is
2423 suitable for finding runs of numbers:
2425 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
2426 >>> for group in consecutive_groups(iterable):
2427 ... print(list(group))
2428 [1]
2429 [10, 11, 12]
2430 [20]
2431 [30, 31, 32, 33]
2432 [40]
2434 For finding runs of adjacent letters, try using the :meth:`index` method
2435 of a string of letters:
2437 >>> from string import ascii_lowercase
2438 >>> iterable = 'abcdfgilmnop'
2439 >>> ordering = ascii_lowercase.index
2440 >>> for group in consecutive_groups(iterable, ordering):
2441 ... print(list(group))
2442 ['a', 'b', 'c', 'd']
2443 ['f', 'g']
2444 ['i']
2445 ['l', 'm', 'n', 'o', 'p']
2447 Each group of consecutive items is an iterator that shares it source with
2448 *iterable*. When an an output group is advanced, the previous group is
2449 no longer available unless its elements are copied (e.g., into a ``list``).
2451 >>> iterable = [1, 2, 11, 12, 21, 22]
2452 >>> saved_groups = []
2453 >>> for group in consecutive_groups(iterable):
2454 ... saved_groups.append(list(group)) # Copy group elements
2455 >>> saved_groups
2456 [[1, 2], [11, 12], [21, 22]]
2458 """
2459 for k, g in groupby(
2460 enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
2461 ):
2462 yield map(itemgetter(1), g)
2465def difference(iterable, func=sub, *, initial=None):
2466 """By default, compute the first difference of *iterable* using
2467 :func:`operator.sub`.
2469 >>> iterable = [0, 1, 3, 6, 10]
2470 >>> list(difference(iterable))
2471 [0, 1, 2, 3, 4]
2473 This is the opposite of :func:`itertools.accumulate`'s default behavior:
2475 >>> from itertools import accumulate
2476 >>> iterable = [0, 1, 2, 3, 4]
2477 >>> list(accumulate(iterable))
2478 [0, 1, 3, 6, 10]
2479 >>> list(difference(accumulate(iterable)))
2480 [0, 1, 2, 3, 4]
2482 By default *func* is :func:`operator.sub`, but other functions can be
2483 specified. They will be applied as follows::
2485 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
2487 For example, to do progressive division:
2489 >>> iterable = [1, 2, 6, 24, 120] # Factorial sequence
2490 >>> func = lambda x, y: x // y
2491 >>> list(difference(iterable, func))
2492 [1, 2, 3, 4, 5]
2494 Since Python 3.8, :func:`itertools.accumulate` can be supplied with an
2495 *initial* keyword argument. If :func:`difference` is called with *initial*
2496 set to something other than ``None``, it will skip the first element when
2497 computing successive differences.
2499 >>> iterable = [10, 11, 13, 16] # accumulate([1, 2, 3], initial=10)
2500 >>> list(difference(iterable, initial=10))
2501 [1, 2, 3]
2503 """
2504 a, b = tee(iterable)
2505 try:
2506 first = [next(b)]
2507 except StopIteration:
2508 return iter([])
2510 if initial is not None:
2511 first = []
2513 return chain(first, starmap(func, zip(b, a)))
2516class SequenceView(Sequence):
2517 """Return a read-only view of the sequence object *target*.
2519 :class:`SequenceView` objects are analogous to Python's built-in
2520 "dictionary view" types. They provide a dynamic view of a sequence's items,
2521 meaning that when the sequence updates, so does the view.
2523 >>> seq = ['0', '1', '2']
2524 >>> view = SequenceView(seq)
2525 >>> view
2526 SequenceView(['0', '1', '2'])
2527 >>> seq.append('3')
2528 >>> view
2529 SequenceView(['0', '1', '2', '3'])
2531 Sequence views support indexing, slicing, and length queries. They act
2532 like the underlying sequence, except they don't allow assignment:
2534 >>> view[1]
2535 '1'
2536 >>> view[1:-1]
2537 ['1', '2']
2538 >>> len(view)
2539 4
2541 Sequence views are useful as an alternative to copying, as they don't
2542 require (much) extra storage.
2544 """
2546 def __init__(self, target):
2547 if not isinstance(target, Sequence):
2548 raise TypeError
2549 self._target = target
2551 def __getitem__(self, index):
2552 return self._target[index]
2554 def __len__(self):
2555 return len(self._target)
2557 def __repr__(self):
2558 return '{}({})'.format(self.__class__.__name__, repr(self._target))
2561class seekable:
2562 """Wrap an iterator to allow for seeking backward and forward. This
2563 progressively caches the items in the source iterable so they can be
2564 re-visited.
2566 Call :meth:`seek` with an index to seek to that position in the source
2567 iterable.
2569 To "reset" an iterator, seek to ``0``:
2571 >>> from itertools import count
2572 >>> it = seekable((str(n) for n in count()))
2573 >>> next(it), next(it), next(it)
2574 ('0', '1', '2')
2575 >>> it.seek(0)
2576 >>> next(it), next(it), next(it)
2577 ('0', '1', '2')
2578 >>> next(it)
2579 '3'
2581 You can also seek forward:
2583 >>> it = seekable((str(n) for n in range(20)))
2584 >>> it.seek(10)
2585 >>> next(it)
2586 '10'
2587 >>> it.seek(20) # Seeking past the end of the source isn't a problem
2588 >>> list(it)
2589 []
2590 >>> it.seek(0) # Resetting works even after hitting the end
2591 >>> next(it), next(it), next(it)
2592 ('0', '1', '2')
2594 Call :meth:`peek` to look ahead one item without advancing the iterator:
2596 >>> it = seekable('1234')
2597 >>> it.peek()
2598 '1'
2599 >>> list(it)
2600 ['1', '2', '3', '4']
2601 >>> it.peek(default='empty')
2602 'empty'
2604 Before the iterator is at its end, calling :func:`bool` on it will return
2605 ``True``. After it will return ``False``:
2607 >>> it = seekable('5678')
2608 >>> bool(it)
2609 True
2610 >>> list(it)
2611 ['5', '6', '7', '8']
2612 >>> bool(it)
2613 False
2615 You may view the contents of the cache with the :meth:`elements` method.
2616 That returns a :class:`SequenceView`, a view that updates automatically:
2618 >>> it = seekable((str(n) for n in range(10)))
2619 >>> next(it), next(it), next(it)
2620 ('0', '1', '2')
2621 >>> elements = it.elements()
2622 >>> elements
2623 SequenceView(['0', '1', '2'])
2624 >>> next(it)
2625 '3'
2626 >>> elements
2627 SequenceView(['0', '1', '2', '3'])
2629 By default, the cache grows as the source iterable progresses, so beware of
2630 wrapping very large or infinite iterables. Supply *maxlen* to limit the
2631 size of the cache (this of course limits how far back you can seek).
2633 >>> from itertools import count
2634 >>> it = seekable((str(n) for n in count()), maxlen=2)
2635 >>> next(it), next(it), next(it), next(it)
2636 ('0', '1', '2', '3')
2637 >>> list(it.elements())
2638 ['2', '3']
2639 >>> it.seek(0)
2640 >>> next(it), next(it), next(it), next(it)
2641 ('2', '3', '4', '5')
2642 >>> next(it)
2643 '6'
2645 """
2647 def __init__(self, iterable, maxlen=None):
2648 self._source = iter(iterable)
2649 if maxlen is None:
2650 self._cache = []
2651 else:
2652 self._cache = deque([], maxlen)
2653 self._index = None
2655 def __iter__(self):
2656 return self
2658 def __next__(self):
2659 if self._index is not None:
2660 try:
2661 item = self._cache[self._index]
2662 except IndexError:
2663 self._index = None
2664 else:
2665 self._index += 1
2666 return item
2668 item = next(self._source)
2669 self._cache.append(item)
2670 return item
2672 def __bool__(self):
2673 try:
2674 self.peek()
2675 except StopIteration:
2676 return False
2677 return True
2679 def peek(self, default=_marker):
2680 try:
2681 peeked = next(self)
2682 except StopIteration:
2683 if default is _marker:
2684 raise
2685 return default
2686 if self._index is None:
2687 self._index = len(self._cache)
2688 self._index -= 1
2689 return peeked
2691 def elements(self):
2692 return SequenceView(self._cache)
2694 def seek(self, index):
2695 self._index = index
2696 remainder = index - len(self._cache)
2697 if remainder > 0:
2698 consume(self, remainder)
2701class run_length:
2702 """
2703 :func:`run_length.encode` compresses an iterable with run-length encoding.
2704 It yields groups of repeated items with the count of how many times they
2705 were repeated:
2707 >>> uncompressed = 'abbcccdddd'
2708 >>> list(run_length.encode(uncompressed))
2709 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2711 :func:`run_length.decode` decompresses an iterable that was previously
2712 compressed with run-length encoding. It yields the items of the
2713 decompressed iterable:
2715 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2716 >>> list(run_length.decode(compressed))
2717 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
2719 """
2721 @staticmethod
2722 def encode(iterable):
2723 return ((k, ilen(g)) for k, g in groupby(iterable))
2725 @staticmethod
2726 def decode(iterable):
2727 return chain.from_iterable(repeat(k, n) for k, n in iterable)
2730def exactly_n(iterable, n, predicate=bool):
2731 """Return ``True`` if exactly ``n`` items in the iterable are ``True``
2732 according to the *predicate* function.
2734 >>> exactly_n([True, True, False], 2)
2735 True
2736 >>> exactly_n([True, True, False], 1)
2737 False
2738 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
2739 True
2741 The iterable will be advanced until ``n + 1`` truthy items are encountered,
2742 so avoid calling it on infinite iterables.
2744 """
2745 return len(take(n + 1, filter(predicate, iterable))) == n
2748def circular_shifts(iterable):
2749 """Return a list of circular shifts of *iterable*.
2751 >>> circular_shifts(range(4))
2752 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
2753 """
2754 lst = list(iterable)
2755 return take(len(lst), windowed(cycle(lst), len(lst)))
2758def make_decorator(wrapping_func, result_index=0):
2759 """Return a decorator version of *wrapping_func*, which is a function that
2760 modifies an iterable. *result_index* is the position in that function's
2761 signature where the iterable goes.
2763 This lets you use itertools on the "production end," i.e. at function
2764 definition. This can augment what the function returns without changing the
2765 function's code.
2767 For example, to produce a decorator version of :func:`chunked`:
2769 >>> from more_itertools import chunked
2770 >>> chunker = make_decorator(chunked, result_index=0)
2771 >>> @chunker(3)
2772 ... def iter_range(n):
2773 ... return iter(range(n))
2774 ...
2775 >>> list(iter_range(9))
2776 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
2778 To only allow truthy items to be returned:
2780 >>> truth_serum = make_decorator(filter, result_index=1)
2781 >>> @truth_serum(bool)
2782 ... def boolean_test():
2783 ... return [0, 1, '', ' ', False, True]
2784 ...
2785 >>> list(boolean_test())
2786 [1, ' ', True]
2788 The :func:`peekable` and :func:`seekable` wrappers make for practical
2789 decorators:
2791 >>> from more_itertools import peekable
2792 >>> peekable_function = make_decorator(peekable)
2793 >>> @peekable_function()
2794 ... def str_range(*args):
2795 ... return (str(x) for x in range(*args))
2796 ...
2797 >>> it = str_range(1, 20, 2)
2798 >>> next(it), next(it), next(it)
2799 ('1', '3', '5')
2800 >>> it.peek()
2801 '7'
2802 >>> next(it)
2803 '7'
2805 """
2806 # See https://sites.google.com/site/bbayles/index/decorator_factory for
2807 # notes on how this works.
2808 def decorator(*wrapping_args, **wrapping_kwargs):
2809 def outer_wrapper(f):
2810 def inner_wrapper(*args, **kwargs):
2811 result = f(*args, **kwargs)
2812 wrapping_args_ = list(wrapping_args)
2813 wrapping_args_.insert(result_index, result)
2814 return wrapping_func(*wrapping_args_, **wrapping_kwargs)
2816 return inner_wrapper
2818 return outer_wrapper
2820 return decorator
2823def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
2824 """Return a dictionary that maps the items in *iterable* to categories
2825 defined by *keyfunc*, transforms them with *valuefunc*, and
2826 then summarizes them by category with *reducefunc*.
2828 *valuefunc* defaults to the identity function if it is unspecified.
2829 If *reducefunc* is unspecified, no summarization takes place:
2831 >>> keyfunc = lambda x: x.upper()
2832 >>> result = map_reduce('abbccc', keyfunc)
2833 >>> sorted(result.items())
2834 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
2836 Specifying *valuefunc* transforms the categorized items:
2838 >>> keyfunc = lambda x: x.upper()
2839 >>> valuefunc = lambda x: 1
2840 >>> result = map_reduce('abbccc', keyfunc, valuefunc)
2841 >>> sorted(result.items())
2842 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
2844 Specifying *reducefunc* summarizes the categorized items:
2846 >>> keyfunc = lambda x: x.upper()
2847 >>> valuefunc = lambda x: 1
2848 >>> reducefunc = sum
2849 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
2850 >>> sorted(result.items())
2851 [('A', 1), ('B', 2), ('C', 3)]
2853 You may want to filter the input iterable before applying the map/reduce
2854 procedure:
2856 >>> all_items = range(30)
2857 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
2858 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
2859 >>> categories = map_reduce(items, keyfunc=keyfunc)
2860 >>> sorted(categories.items())
2861 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
2862 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
2863 >>> sorted(summaries.items())
2864 [(0, 90), (1, 75)]
2866 Note that all items in the iterable are gathered into a list before the
2867 summarization step, which may require significant storage.
2869 The returned object is a :obj:`collections.defaultdict` with the
2870 ``default_factory`` set to ``None``, such that it behaves like a normal
2871 dictionary.
2873 """
2874 valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
2876 ret = defaultdict(list)
2877 for item in iterable:
2878 key = keyfunc(item)
2879 value = valuefunc(item)
2880 ret[key].append(value)
2882 if reducefunc is not None:
2883 for key, value_list in ret.items():
2884 ret[key] = reducefunc(value_list)
2886 ret.default_factory = None
2887 return ret
2890def rlocate(iterable, pred=bool, window_size=None):
2891 """Yield the index of each item in *iterable* for which *pred* returns
2892 ``True``, starting from the right and moving left.
2894 *pred* defaults to :func:`bool`, which will select truthy items:
2896 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
2897 [4, 2, 1]
2899 Set *pred* to a custom function to, e.g., find the indexes for a particular
2900 item:
2902 >>> iterable = iter('abcb')
2903 >>> pred = lambda x: x == 'b'
2904 >>> list(rlocate(iterable, pred))
2905 [3, 1]
2907 If *window_size* is given, then the *pred* function will be called with
2908 that many items. This enables searching for sub-sequences:
2910 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2911 >>> pred = lambda *args: args == (1, 2, 3)
2912 >>> list(rlocate(iterable, pred=pred, window_size=3))
2913 [9, 5, 1]
2915 Beware, this function won't return anything for infinite iterables.
2916 If *iterable* is reversible, ``rlocate`` will reverse it and search from
2917 the right. Otherwise, it will search from the left and return the results
2918 in reverse order.
2920 See :func:`locate` to for other example applications.
2922 """
2923 if window_size is None:
2924 try:
2925 len_iter = len(iterable)
2926 return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
2927 except TypeError:
2928 pass
2930 return reversed(list(locate(iterable, pred, window_size)))
2933def replace(iterable, pred, substitutes, count=None, window_size=1):
2934 """Yield the items from *iterable*, replacing the items for which *pred*
2935 returns ``True`` with the items from the iterable *substitutes*.
2937 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
2938 >>> pred = lambda x: x == 0
2939 >>> substitutes = (2, 3)
2940 >>> list(replace(iterable, pred, substitutes))
2941 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
2943 If *count* is given, the number of replacements will be limited:
2945 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
2946 >>> pred = lambda x: x == 0
2947 >>> substitutes = [None]
2948 >>> list(replace(iterable, pred, substitutes, count=2))
2949 [1, 1, None, 1, 1, None, 1, 1, 0]
2951 Use *window_size* to control the number of items passed as arguments to
2952 *pred*. This allows for locating and replacing subsequences.
2954 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
2955 >>> window_size = 3
2956 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
2957 >>> substitutes = [3, 4] # Splice in these items
2958 >>> list(replace(iterable, pred, substitutes, window_size=window_size))
2959 [3, 4, 5, 3, 4, 5]
2961 """
2962 if window_size < 1:
2963 raise ValueError('window_size must be at least 1')
2965 # Save the substitutes iterable, since it's used more than once
2966 substitutes = tuple(substitutes)
2968 # Add padding such that the number of windows matches the length of the
2969 # iterable
2970 it = chain(iterable, [_marker] * (window_size - 1))
2971 windows = windowed(it, window_size)
2973 n = 0
2974 for w in windows:
2975 # If the current window matches our predicate (and we haven't hit
2976 # our maximum number of replacements), splice in the substitutes
2977 # and then consume the following windows that overlap with this one.
2978 # For example, if the iterable is (0, 1, 2, 3, 4...)
2979 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
2980 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
2981 if pred(*w):
2982 if (count is None) or (n < count):
2983 n += 1
2984 yield from substitutes
2985 consume(windows, window_size - 1)
2986 continue
2988 # If there was no match (or we've reached the replacement limit),
2989 # yield the first item from the window.
2990 if w and (w[0] is not _marker):
2991 yield w[0]
2994def partitions(iterable):
2995 """Yield all possible order-preserving partitions of *iterable*.
2997 >>> iterable = 'abc'
2998 >>> for part in partitions(iterable):
2999 ... print([''.join(p) for p in part])
3000 ['abc']
3001 ['a', 'bc']
3002 ['ab', 'c']
3003 ['a', 'b', 'c']
3005 This is unrelated to :func:`partition`.
3007 """
3008 sequence = list(iterable)
3009 n = len(sequence)
3010 for i in powerset(range(1, n)):
3011 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
3014def set_partitions(iterable, k=None):
3015 """
3016 Yield the set partitions of *iterable* into *k* parts. Set partitions are
3017 not order-preserving.
3019 >>> iterable = 'abc'
3020 >>> for part in set_partitions(iterable, 2):
3021 ... print([''.join(p) for p in part])
3022 ['a', 'bc']
3023 ['ab', 'c']
3024 ['b', 'ac']
3027 If *k* is not given, every set partition is generated.
3029 >>> iterable = 'abc'
3030 >>> for part in set_partitions(iterable):
3031 ... print([''.join(p) for p in part])
3032 ['abc']
3033 ['a', 'bc']
3034 ['ab', 'c']
3035 ['b', 'ac']
3036 ['a', 'b', 'c']
3038 """
3039 L = list(iterable)
3040 n = len(L)
3041 if k is not None:
3042 if k < 1:
3043 raise ValueError(
3044 "Can't partition in a negative or zero number of groups"
3045 )
3046 elif k > n:
3047 return
3049 def set_partitions_helper(L, k):
3050 n = len(L)
3051 if k == 1:
3052 yield [L]
3053 elif n == k:
3054 yield [[s] for s in L]
3055 else:
3056 e, *M = L
3057 for p in set_partitions_helper(M, k - 1):
3058 yield [[e], *p]
3059 for p in set_partitions_helper(M, k):
3060 for i in range(len(p)):
3061 yield p[:i] + [[e] + p[i]] + p[i + 1 :]
3063 if k is None:
3064 for k in range(1, n + 1):
3065 yield from set_partitions_helper(L, k)
3066 else:
3067 yield from set_partitions_helper(L, k)
3070class time_limited:
3071 """
3072 Yield items from *iterable* until *limit_seconds* have passed.
3073 If the time limit expires before all items have been yielded, the
3074 ``timed_out`` parameter will be set to ``True``.
3076 >>> from time import sleep
3077 >>> def generator():
3078 ... yield 1
3079 ... yield 2
3080 ... sleep(0.2)
3081 ... yield 3
3082 >>> iterable = time_limited(0.1, generator())
3083 >>> list(iterable)
3084 [1, 2]
3085 >>> iterable.timed_out
3086 True
3088 Note that the time is checked before each item is yielded, and iteration
3089 stops if the time elapsed is greater than *limit_seconds*. If your time
3090 limit is 1 second, but it takes 2 seconds to generate the first item from
3091 the iterable, the function will run for 2 seconds and not yield anything.
3093 """
3095 def __init__(self, limit_seconds, iterable):
3096 if limit_seconds < 0:
3097 raise ValueError('limit_seconds must be positive')
3098 self.limit_seconds = limit_seconds
3099 self._iterable = iter(iterable)
3100 self._start_time = monotonic()
3101 self.timed_out = False
3103 def __iter__(self):
3104 return self
3106 def __next__(self):
3107 item = next(self._iterable)
3108 if monotonic() - self._start_time > self.limit_seconds:
3109 self.timed_out = True
3110 raise StopIteration
3112 return item
3115def only(iterable, default=None, too_long=None):
3116 """If *iterable* has only one item, return it.
3117 If it has zero items, return *default*.
3118 If it has more than one item, raise the exception given by *too_long*,
3119 which is ``ValueError`` by default.
3121 >>> only([], default='missing')
3122 'missing'
3123 >>> only([1])
3124 1
3125 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
3126 Traceback (most recent call last):
3127 ...
3128 ValueError: Expected exactly one item in iterable, but got 1, 2,
3129 and perhaps more.'
3130 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
3131 Traceback (most recent call last):
3132 ...
3133 TypeError
3135 Note that :func:`only` attempts to advance *iterable* twice to ensure there
3136 is only one item. See :func:`spy` or :func:`peekable` to check
3137 iterable contents less destructively.
3138 """
3139 it = iter(iterable)
3140 first_value = next(it, default)
3142 try:
3143 second_value = next(it)
3144 except StopIteration:
3145 pass
3146 else:
3147 msg = (
3148 'Expected exactly one item in iterable, but got {!r}, {!r}, '
3149 'and perhaps more.'.format(first_value, second_value)
3150 )
3151 raise too_long or ValueError(msg)
3153 return first_value
3156def ichunked(iterable, n):
3157 """Break *iterable* into sub-iterables with *n* elements each.
3158 :func:`ichunked` is like :func:`chunked`, but it yields iterables
3159 instead of lists.
3161 If the sub-iterables are read in order, the elements of *iterable*
3162 won't be stored in memory.
3163 If they are read out of order, :func:`itertools.tee` is used to cache
3164 elements as necessary.
3166 >>> from itertools import count
3167 >>> all_chunks = ichunked(count(), 4)
3168 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
3169 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been
3170 [4, 5, 6, 7]
3171 >>> list(c_1)
3172 [0, 1, 2, 3]
3173 >>> list(c_3)
3174 [8, 9, 10, 11]
3176 """
3177 source = iter(iterable)
3179 while True:
3180 # Check to see whether we're at the end of the source iterable
3181 item = next(source, _marker)
3182 if item is _marker:
3183 return
3185 # Clone the source and yield an n-length slice
3186 source, it = tee(chain([item], source))
3187 yield islice(it, n)
3189 # Advance the source iterable
3190 consume(source, n)
3193def distinct_combinations(iterable, r):
3194 """Yield the distinct combinations of *r* items taken from *iterable*.
3196 >>> list(distinct_combinations([0, 0, 1], 2))
3197 [(0, 0), (0, 1)]
3199 Equivalent to ``set(combinations(iterable))``, except duplicates are not
3200 generated and thrown away. For larger input sequences this is much more
3201 efficient.
3203 """
3204 if r < 0:
3205 raise ValueError('r must be non-negative')
3206 elif r == 0:
3207 yield ()
3208 return
3209 pool = tuple(iterable)
3210 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
3211 current_combo = [None] * r
3212 level = 0
3213 while generators:
3214 try:
3215 cur_idx, p = next(generators[-1])
3216 except StopIteration:
3217 generators.pop()
3218 level -= 1
3219 continue
3220 current_combo[level] = p
3221 if level + 1 == r:
3222 yield tuple(current_combo)
3223 else:
3224 generators.append(
3225 unique_everseen(
3226 enumerate(pool[cur_idx + 1 :], cur_idx + 1),
3227 key=itemgetter(1),
3228 )
3229 )
3230 level += 1
3233def filter_except(validator, iterable, *exceptions):
3234 """Yield the items from *iterable* for which the *validator* function does
3235 not raise one of the specified *exceptions*.
3237 *validator* is called for each item in *iterable*.
3238 It should be a function that accepts one argument and raises an exception
3239 if that item is not valid.
3241 >>> iterable = ['1', '2', 'three', '4', None]
3242 >>> list(filter_except(int, iterable, ValueError, TypeError))
3243 ['1', '2', '4']
3245 If an exception other than one given by *exceptions* is raised by
3246 *validator*, it is raised like normal.
3247 """
3248 for item in iterable:
3249 try:
3250 validator(item)
3251 except exceptions:
3252 pass
3253 else:
3254 yield item
3257def map_except(function, iterable, *exceptions):
3258 """Transform each item from *iterable* with *function* and yield the
3259 result, unless *function* raises one of the specified *exceptions*.
3261 *function* is called to transform each item in *iterable*.
3262 It should be a accept one argument.
3264 >>> iterable = ['1', '2', 'three', '4', None]
3265 >>> list(map_except(int, iterable, ValueError, TypeError))
3266 [1, 2, 4]
3268 If an exception other than one given by *exceptions* is raised by
3269 *function*, it is raised like normal.
3270 """
3271 for item in iterable:
3272 try:
3273 yield function(item)
3274 except exceptions:
3275 pass
3278def _sample_unweighted(iterable, k):
3279 # Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li:
3280 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
3282 # Fill up the reservoir (collection of samples) with the first `k` samples
3283 reservoir = take(k, iterable)
3285 # Generate random number that's the largest in a sample of k U(0,1) numbers
3286 # Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic
3287 W = exp(log(random()) / k)
3289 # The number of elements to skip before changing the reservoir is a random
3290 # number with a geometric distribution. Sample it using random() and logs.
3291 next_index = k + floor(log(random()) / log(1 - W))
3293 for index, element in enumerate(iterable, k):
3295 if index == next_index:
3296 reservoir[randrange(k)] = element
3297 # The new W is the largest in a sample of k U(0, `old_W`) numbers
3298 W *= exp(log(random()) / k)
3299 next_index += floor(log(random()) / log(1 - W)) + 1
3301 return reservoir
3304def _sample_weighted(iterable, k, weights):
3305 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
3306 # "Weighted random sampling with a reservoir".
3308 # Log-transform for numerical stability for weights that are small/large
3309 weight_keys = (log(random()) / weight for weight in weights)
3311 # Fill up the reservoir (collection of samples) with the first `k`
3312 # weight-keys and elements, then heapify the list.
3313 reservoir = take(k, zip(weight_keys, iterable))
3314 heapify(reservoir)
3316 # The number of jumps before changing the reservoir is a random variable
3317 # with an exponential distribution. Sample it using random() and logs.
3318 smallest_weight_key, _ = reservoir[0]
3319 weights_to_skip = log(random()) / smallest_weight_key
3321 for weight, element in zip(weights, iterable):
3322 if weight >= weights_to_skip:
3323 # The notation here is consistent with the paper, but we store
3324 # the weight-keys in log-space for better numerical stability.
3325 smallest_weight_key, _ = reservoir[0]
3326 t_w = exp(weight * smallest_weight_key)
3327 r_2 = uniform(t_w, 1) # generate U(t_w, 1)
3328 weight_key = log(r_2) / weight
3329 heapreplace(reservoir, (weight_key, element))
3330 smallest_weight_key, _ = reservoir[0]
3331 weights_to_skip = log(random()) / smallest_weight_key
3332 else:
3333 weights_to_skip -= weight
3335 # Equivalent to [element for weight_key, element in sorted(reservoir)]
3336 return [heappop(reservoir)[1] for _ in range(k)]
3339def sample(iterable, k, weights=None):
3340 """Return a *k*-length list of elements chosen (without replacement)
3341 from the *iterable*. Like :func:`random.sample`, but works on iterables
3342 of unknown length.
3344 >>> iterable = range(100)
3345 >>> sample(iterable, 5) # doctest: +SKIP
3346 [81, 60, 96, 16, 4]
3348 An iterable with *weights* may also be given:
3350 >>> iterable = range(100)
3351 >>> weights = (i * i + 1 for i in range(100))
3352 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
3353 [79, 67, 74, 66, 78]
3355 The algorithm can also be used to generate weighted random permutations.
3356 The relative weight of each item determines the probability that it
3357 appears late in the permutation.
3359 >>> data = "abcdefgh"
3360 >>> weights = range(1, len(data) + 1)
3361 >>> sample(data, k=len(data), weights=weights) # doctest: +SKIP
3362 ['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f']
3363 """
3364 if k == 0:
3365 return []
3367 iterable = iter(iterable)
3368 if weights is None:
3369 return _sample_unweighted(iterable, k)
3370 else:
3371 weights = iter(weights)
3372 return _sample_weighted(iterable, k, weights)
3375def is_sorted(iterable, key=None, reverse=False):
3376 """Returns ``True`` if the items of iterable are in sorted order, and
3377 ``False`` otherwise. *key* and *reverse* have the same meaning that they do
3378 in the built-in :func:`sorted` function.
3380 >>> is_sorted(['1', '2', '3', '4', '5'], key=int)
3381 True
3382 >>> is_sorted([5, 4, 3, 1, 2], reverse=True)
3383 False
3385 The function returns ``False`` after encountering the first out-of-order
3386 item. If there are no out-of-order items, the iterable is exhausted.
3387 """
3389 compare = lt if reverse else gt
3390 it = iterable if (key is None) else map(key, iterable)
3391 return not any(starmap(compare, pairwise(it)))
3394class AbortThread(BaseException):
3395 pass
3398class callback_iter:
3399 """Convert a function that uses callbacks to an iterator.
3401 Let *func* be a function that takes a `callback` keyword argument.
3402 For example:
3404 >>> def func(callback=None):
3405 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
3406 ... if callback:
3407 ... callback(i, c)
3408 ... return 4
3411 Use ``with callback_iter(func)`` to get an iterator over the parameters
3412 that are delivered to the callback.
3414 >>> with callback_iter(func) as it:
3415 ... for args, kwargs in it:
3416 ... print(args)
3417 (1, 'a')
3418 (2, 'b')
3419 (3, 'c')
3421 The function will be called in a background thread. The ``done`` property
3422 indicates whether it has completed execution.
3424 >>> it.done
3425 True
3427 If it completes successfully, its return value will be available
3428 in the ``result`` property.
3430 >>> it.result
3431 4
3433 Notes:
3435 * If the function uses some keyword argument besides ``callback``, supply
3436 *callback_kwd*.
3437 * If it finished executing, but raised an exception, accessing the
3438 ``result`` property will raise the same exception.
3439 * If it hasn't finished executing, accessing the ``result``
3440 property from within the ``with`` block will raise ``RuntimeError``.
3441 * If it hasn't finished executing, accessing the ``result`` property from
3442 outside the ``with`` block will raise a
3443 ``more_itertools.AbortThread`` exception.
3444 * Provide *wait_seconds* to adjust how frequently the it is polled for
3445 output.
3447 """
3449 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
3450 self._func = func
3451 self._callback_kwd = callback_kwd
3452 self._aborted = False
3453 self._future = None
3454 self._wait_seconds = wait_seconds
3455 self._executor = ThreadPoolExecutor(max_workers=1)
3456 self._iterator = self._reader()
3458 def __enter__(self):
3459 return self
3461 def __exit__(self, exc_type, exc_value, traceback):
3462 self._aborted = True
3463 self._executor.shutdown()
3465 def __iter__(self):
3466 return self
3468 def __next__(self):
3469 return next(self._iterator)
3471 @property
3472 def done(self):
3473 if self._future is None:
3474 return False
3475 return self._future.done()
3477 @property
3478 def result(self):
3479 if not self.done:
3480 raise RuntimeError('Function has not yet completed')
3482 return self._future.result()
3484 def _reader(self):
3485 q = Queue()
3487 def callback(*args, **kwargs):
3488 if self._aborted:
3489 raise AbortThread('canceled by user')
3491 q.put((args, kwargs))
3493 self._future = self._executor.submit(
3494 self._func, **{self._callback_kwd: callback}
3495 )
3497 while True:
3498 try:
3499 item = q.get(timeout=self._wait_seconds)
3500 except Empty:
3501 pass
3502 else:
3503 q.task_done()
3504 yield item
3506 if self._future.done():
3507 break
3509 remaining = []
3510 while True:
3511 try:
3512 item = q.get_nowait()
3513 except Empty:
3514 break
3515 else:
3516 q.task_done()
3517 remaining.append(item)
3518 q.join()
3519 yield from remaining
3522def windowed_complete(iterable, n):
3523 """
3524 Yield ``(beginning, middle, end)`` tuples, where:
3526 * Each ``middle`` has *n* items from *iterable*
3527 * Each ``beginning`` has the items before the ones in ``middle``
3528 * Each ``end`` has the items after the ones in ``middle``
3530 >>> iterable = range(7)
3531 >>> n = 3
3532 >>> for beginning, middle, end in windowed_complete(iterable, n):
3533 ... print(beginning, middle, end)
3534 () (0, 1, 2) (3, 4, 5, 6)
3535 (0,) (1, 2, 3) (4, 5, 6)
3536 (0, 1) (2, 3, 4) (5, 6)
3537 (0, 1, 2) (3, 4, 5) (6,)
3538 (0, 1, 2, 3) (4, 5, 6) ()
3540 Note that *n* must be at least 0 and most equal to the length of
3541 *iterable*.
3543 This function will exhaust the iterable and may require significant
3544 storage.
3545 """
3546 if n < 0:
3547 raise ValueError('n must be >= 0')
3549 seq = tuple(iterable)
3550 size = len(seq)
3552 if n > size:
3553 raise ValueError('n must be <= len(seq)')
3555 for i in range(size - n + 1):
3556 beginning = seq[:i]
3557 middle = seq[i : i + n]
3558 end = seq[i + n :]
3559 yield beginning, middle, end
3562def all_unique(iterable, key=None):
3563 """
3564 Returns ``True`` if all the elements of *iterable* are unique (no two
3565 elements are equal).
3567 >>> all_unique('ABCB')
3568 False
3570 If a *key* function is specified, it will be used to make comparisons.
3572 >>> all_unique('ABCb')
3573 True
3574 >>> all_unique('ABCb', str.lower)
3575 False
3577 The function returns as soon as the first non-unique element is
3578 encountered. Iterables with a mix of hashable and unhashable items can
3579 be used, but the function will be slower for unhashable items.
3580 """
3581 seenset = set()
3582 seenset_add = seenset.add
3583 seenlist = []
3584 seenlist_add = seenlist.append
3585 for element in map(key, iterable) if key else iterable:
3586 try:
3587 if element in seenset:
3588 return False
3589 seenset_add(element)
3590 except TypeError:
3591 if element in seenlist:
3592 return False
3593 seenlist_add(element)
3594 return True
3597def nth_product(index, *args):
3598 """Equivalent to ``list(product(*args))[index]``.
3600 The products of *args* can be ordered lexicographically.
3601 :func:`nth_product` computes the product at sort position *index* without
3602 computing the previous products.
3604 >>> nth_product(8, range(2), range(2), range(2), range(2))
3605 (1, 0, 0, 0)
3607 ``IndexError`` will be raised if the given *index* is invalid.
3608 """
3609 pools = list(map(tuple, reversed(args)))
3610 ns = list(map(len, pools))
3612 c = reduce(mul, ns)
3614 if index < 0:
3615 index += c
3617 if not 0 <= index < c:
3618 raise IndexError
3620 result = []
3621 for pool, n in zip(pools, ns):
3622 result.append(pool[index % n])
3623 index //= n
3625 return tuple(reversed(result))
3628def nth_permutation(iterable, r, index):
3629 """Equivalent to ``list(permutations(iterable, r))[index]```
3631 The subsequences of *iterable* that are of length *r* where order is
3632 important can be ordered lexicographically. :func:`nth_permutation`
3633 computes the subsequence at sort position *index* directly, without
3634 computing the previous subsequences.
3636 >>> nth_permutation('ghijk', 2, 5)
3637 ('h', 'i')
3639 ``ValueError`` will be raised If *r* is negative or greater than the length
3640 of *iterable*.
3641 ``IndexError`` will be raised if the given *index* is invalid.
3642 """
3643 pool = list(iterable)
3644 n = len(pool)
3646 if r is None or r == n:
3647 r, c = n, factorial(n)
3648 elif not 0 <= r < n:
3649 raise ValueError
3650 else:
3651 c = factorial(n) // factorial(n - r)
3653 if index < 0:
3654 index += c
3656 if not 0 <= index < c:
3657 raise IndexError
3659 if c == 0:
3660 return tuple()
3662 result = [0] * r
3663 q = index * factorial(n) // c if r < n else index
3664 for d in range(1, n + 1):
3665 q, i = divmod(q, d)
3666 if 0 <= n - d < r:
3667 result[n - d] = i
3668 if q == 0:
3669 break
3671 return tuple(map(pool.pop, result))
3674def value_chain(*args):
3675 """Yield all arguments passed to the function in the same order in which
3676 they were passed. If an argument itself is iterable then iterate over its
3677 values.
3679 >>> list(value_chain(1, 2, 3, [4, 5, 6]))
3680 [1, 2, 3, 4, 5, 6]
3682 Binary and text strings are not considered iterable and are emitted
3683 as-is:
3685 >>> list(value_chain('12', '34', ['56', '78']))
3686 ['12', '34', '56', '78']
3689 Multiple levels of nesting are not flattened.
3691 """
3692 for value in args:
3693 if isinstance(value, (str, bytes)):
3694 yield value
3695 continue
3696 try:
3697 yield from value
3698 except TypeError:
3699 yield value
3702def product_index(element, *args):
3703 """Equivalent to ``list(product(*args)).index(element)``
3705 The products of *args* can be ordered lexicographically.
3706 :func:`product_index` computes the first index of *element* without
3707 computing the previous products.
3709 >>> product_index([8, 2], range(10), range(5))
3710 42
3712 ``ValueError`` will be raised if the given *element* isn't in the product
3713 of *args*.
3714 """
3715 index = 0
3717 for x, pool in zip_longest(element, args, fillvalue=_marker):
3718 if x is _marker or pool is _marker:
3719 raise ValueError('element is not a product of args')
3721 pool = tuple(pool)
3722 index = index * len(pool) + pool.index(x)
3724 return index
3727def combination_index(element, iterable):
3728 """Equivalent to ``list(combinations(iterable, r)).index(element)``
3730 The subsequences of *iterable* that are of length *r* can be ordered
3731 lexicographically. :func:`combination_index` computes the index of the
3732 first *element*, without computing the previous combinations.
3734 >>> combination_index('adf', 'abcdefg')
3735 10
3737 ``ValueError`` will be raised if the given *element* isn't one of the
3738 combinations of *iterable*.
3739 """
3740 element = enumerate(element)
3741 k, y = next(element, (None, None))
3742 if k is None:
3743 return 0
3745 indexes = []
3746 pool = enumerate(iterable)
3747 for n, x in pool:
3748 if x == y:
3749 indexes.append(n)
3750 tmp, y = next(element, (None, None))
3751 if tmp is None:
3752 break
3753 else:
3754 k = tmp
3755 else:
3756 raise ValueError('element is not a combination of iterable')
3758 n, _ = last(pool, default=(n, None))
3760 # Python versiosn below 3.8 don't have math.comb
3761 index = 1
3762 for i, j in enumerate(reversed(indexes), start=1):
3763 j = n - j
3764 if i <= j:
3765 index += factorial(j) // (factorial(i) * factorial(j - i))
3767 return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index
3770def permutation_index(element, iterable):
3771 """Equivalent to ``list(permutations(iterable, r)).index(element)```
3773 The subsequences of *iterable* that are of length *r* where order is
3774 important can be ordered lexicographically. :func:`permutation_index`
3775 computes the index of the first *element* directly, without computing
3776 the previous permutations.
3778 >>> permutation_index([1, 3, 2], range(5))
3779 19
3781 ``ValueError`` will be raised if the given *element* isn't one of the
3782 permutations of *iterable*.
3783 """
3784 index = 0
3785 pool = list(iterable)
3786 for i, x in zip(range(len(pool), -1, -1), element):
3787 r = pool.index(x)
3788 index = index * i + r
3789 del pool[r]
3791 return index