Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2""" 

3Representation and utils for ranges of PDF file pages. 

4 

5Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>. 

6All rights reserved. This software is available under a BSD license; 

7see https://github.com/mstamy2/PyPDF2/blob/master/LICENSE 

8""" 

9 

10import re 

11from .utils import isString 

12 

13_INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". 

14PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) 

15# groups: 12 34 5 6 7 8 

16 

17 

18class ParseError(Exception): 

19 pass 

20 

21 

22PAGE_RANGE_HELP = """Remember, page indices start with zero. 

23 Page range expression examples: 

24 : all pages. -1 last page. 

25 22 just the 23rd page. :-1 all but the last page. 

26 0:3 the first three pages. -2 second-to-last page. 

27 :3 the first three pages. -2: last two pages. 

28 5: from the sixth page onward. -3:-1 third & second to last. 

29 The third, "stride" or "step" number is also recognized. 

30 ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 

31 1:10:2 1 3 5 7 9 2::-1 2 1 0. 

32 ::-1 all pages in reverse order. 

33""" 

34 

35 

36class PageRange(object): 

37 """ 

38 A slice-like representation of a range of page indices, 

39 i.e. page numbers, only starting at zero. 

40 The syntax is like what you would put between brackets [ ]. 

41 The slice is one of the few Python types that can't be subclassed, 

42 but this class converts to and from slices, and allows similar use. 

43 o PageRange(str) parses a string representing a page range. 

44 o PageRange(slice) directly "imports" a slice. 

45 o to_slice() gives the equivalent slice. 

46 o str() and repr() allow printing. 

47 o indices(n) is like slice.indices(n). 

48 """ 

49 

50 def __init__(self, arg): 

51 """ 

52 Initialize with either a slice -- giving the equivalent page range, 

53 or a PageRange object -- making a copy, 

54 or a string like 

55 "int", "[int]:[int]" or "[int]:[int]:[int]", 

56 where the brackets indicate optional ints. 

57 {page_range_help} 

58 Note the difference between this notation and arguments to slice(): 

59 slice(3) means the first three pages; 

60 PageRange("3") means the range of only the fourth page. 

61 However PageRange(slice(3)) means the first three pages. 

62 """ 

63 if isinstance(arg, slice): 

64 self._slice = arg 

65 return 

66 

67 if isinstance(arg, PageRange): 

68 self._slice = arg.to_slice() 

69 return 

70 

71 m = isString(arg) and re.match(PAGE_RANGE_RE, arg) 

72 if not m: 

73 raise ParseError(arg) 

74 elif m.group(2): 

75 # Special case: just an int means a range of one page. 

76 start = int(m.group(2)) 

77 stop = start + 1 if start != -1 else None 

78 self._slice = slice(start, stop) 

79 else: 

80 self._slice = slice(*[int(g) if g else None 

81 for g in m.group(4, 6, 8)]) 

82 

83 # Just formatting this when there is __doc__ for __init__ 

84 if __init__.__doc__: 

85 __init__.__doc__ = __init__.__doc__.format(page_range_help=PAGE_RANGE_HELP) 

86 

87 @staticmethod 

88 def valid(input): 

89 """ True if input is a valid initializer for a PageRange. """ 

90 return isinstance(input, slice) or \ 

91 isinstance(input, PageRange) or \ 

92 (isString(input) 

93 and bool(re.match(PAGE_RANGE_RE, input))) 

94 

95 def to_slice(self): 

96 """ Return the slice equivalent of this page range. """ 

97 return self._slice 

98 

99 def __str__(self): 

100 """ A string like "1:2:3". """ 

101 s = self._slice 

102 if s.step == None: 

103 if s.start != None and s.stop == s.start + 1: 

104 return str(s.start) 

105 

106 indices = s.start, s.stop 

107 else: 

108 indices = s.start, s.stop, s.step 

109 return ':'.join("" if i == None else str(i) for i in indices) 

110 

111 def __repr__(self): 

112 """ A string like "PageRange('1:2:3')". """ 

113 return "PageRange(" + repr(str(self)) + ")" 

114 

115 def indices(self, n): 

116 """ 

117 n is the length of the list of pages to choose from. 

118 Returns arguments for range(). See help(slice.indices). 

119 """ 

120 return self._slice.indices(n) 

121 

122 

123PAGE_RANGE_ALL = PageRange(":") # The range of all pages. 

124 

125 

126def parse_filename_page_ranges(args): 

127 """ 

128 Given a list of filenames and page ranges, return a list of 

129 (filename, page_range) pairs. 

130 First arg must be a filename; other ags are filenames, page-range 

131 expressions, slice objects, or PageRange objects. 

132 A filename not followed by a page range indicates all pages of the file. 

133 """ 

134 pairs = [] 

135 pdf_filename = None 

136 did_page_range = False 

137 for arg in args + [None]: 

138 if PageRange.valid(arg): 

139 if not pdf_filename: 

140 raise ValueError("The first argument must be a filename, " \ 

141 "not a page range.") 

142 

143 pairs.append( (pdf_filename, PageRange(arg)) ) 

144 did_page_range = True 

145 else: 

146 # New filename or end of list--do all of the previous file? 

147 if pdf_filename and not did_page_range: 

148 pairs.append( (pdf_filename, PAGE_RANGE_ALL) ) 

149 

150 pdf_filename = arg 

151 did_page_range = False 

152 return pairs