Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/PyPDF2/pagerange.py : 43%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2"""
3Representation and utils for ranges of PDF file pages.
5Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
6All rights reserved. This software is available under a BSD license;
7see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE
8"""
10import re
11from .utils import isString
13_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0".
14PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE)
15# groups: 12 34 5 6 7 8
18class ParseError(Exception):
19 pass
22PAGE_RANGE_HELP = """Remember, page indices start with zero.
23 Page range expression examples:
24 : all pages. -1 last page.
25 22 just the 23rd page. :-1 all but the last page.
26 0:3 the first three pages. -2 second-to-last page.
27 :3 the first three pages. -2: last two pages.
28 5: from the sixth page onward. -3:-1 third & second to last.
29 The third, "stride" or "step" number is also recognized.
30 ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
31 1:10:2 1 3 5 7 9 2::-1 2 1 0.
32 ::-1 all pages in reverse order.
33"""
36class PageRange(object):
37 """
38 A slice-like representation of a range of page indices,
39 i.e. page numbers, only starting at zero.
40 The syntax is like what you would put between brackets [ ].
41 The slice is one of the few Python types that can't be subclassed,
42 but this class converts to and from slices, and allows similar use.
43 o PageRange(str) parses a string representing a page range.
44 o PageRange(slice) directly "imports" a slice.
45 o to_slice() gives the equivalent slice.
46 o str() and repr() allow printing.
47 o indices(n) is like slice.indices(n).
48 """
50 def __init__(self, arg):
51 """
52 Initialize with either a slice -- giving the equivalent page range,
53 or a PageRange object -- making a copy,
54 or a string like
55 "int", "[int]:[int]" or "[int]:[int]:[int]",
56 where the brackets indicate optional ints.
57 {page_range_help}
58 Note the difference between this notation and arguments to slice():
59 slice(3) means the first three pages;
60 PageRange("3") means the range of only the fourth page.
61 However PageRange(slice(3)) means the first three pages.
62 """
63 if isinstance(arg, slice):
64 self._slice = arg
65 return
67 if isinstance(arg, PageRange):
68 self._slice = arg.to_slice()
69 return
71 m = isString(arg) and re.match(PAGE_RANGE_RE, arg)
72 if not m:
73 raise ParseError(arg)
74 elif m.group(2):
75 # Special case: just an int means a range of one page.
76 start = int(m.group(2))
77 stop = start + 1 if start != -1 else None
78 self._slice = slice(start, stop)
79 else:
80 self._slice = slice(*[int(g) if g else None
81 for g in m.group(4, 6, 8)])
83 # Just formatting this when there is __doc__ for __init__
84 if __init__.__doc__:
85 __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP)
87 @staticmethod
88 def valid(input):
89 """ True if input is a valid initializer for a PageRange. """
90 return isinstance(input, slice) or \
91 isinstance(input, PageRange) or \
92 (isString(input)
93 and bool(re.match(PAGE_RANGE_RE, input)))
95 def to_slice(self):
96 """ Return the slice equivalent of this page range. """
97 return self._slice
99 def __str__(self):
100 """ A string like "1:2:3". """
101 s = self._slice
102 if s.step == None:
103 if s.start != None and s.stop == s.start + 1:
104 return str(s.start)
106 indices = s.start, s.stop
107 else:
108 indices = s.start, s.stop, s.step
109 return ':'.join("" if i == None else str(i) for i in indices)
111 def __repr__(self):
112 """ A string like "PageRange('1:2:3')". """
113 return "PageRange(" + repr(str(self)) + ")"
115 def indices(self, n):
116 """
117 n is the length of the list of pages to choose from.
118 Returns arguments for range(). See help(slice.indices).
119 """
120 return self._slice.indices(n)
123PAGE_RANGE_ALL = PageRange(":") # The range of all pages.
126def parse_filename_page_ranges(args):
127 """
128 Given a list of filenames and page ranges, return a list of
129 (filename, page_range) pairs.
130 First arg must be a filename; other ags are filenames, page-range
131 expressions, slice objects, or PageRange objects.
132 A filename not followed by a page range indicates all pages of the file.
133 """
134 pairs = []
135 pdf_filename = None
136 did_page_range = False
137 for arg in args + [None]:
138 if PageRange.valid(arg):
139 if not pdf_filename:
140 raise ValueError("The first argument must be a filename, " \
141 "not a page range.")
143 pairs.append( (pdf_filename, PageRange(arg)) )
144 did_page_range = True
145 else:
146 # New filename or end of list--do all of the previous file?
147 if pdf_filename and not did_page_range:
148 pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
150 pdf_filename = arg
151 did_page_range = False
152 return pairs