Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# Copyright (c) 2006, Mathieu Fenniak 

2# All rights reserved. 

3# 

4# Redistribution and use in source and binary forms, with or without 

5# modification, are permitted provided that the following conditions are 

6# met: 

7# 

8# * Redistributions of source code must retain the above copyright notice, 

9# this list of conditions and the following disclaimer. 

10# * Redistributions in binary form must reproduce the above copyright notice, 

11# this list of conditions and the following disclaimer in the documentation 

12# and/or other materials provided with the distribution. 

13# * The name of the author may not be used to endorse or promote products 

14# derived from this software without specific prior written permission. 

15# 

16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 

17# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 

18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 

19# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 

20# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 

21# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 

22# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 

23# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 

24# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 

25# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 

26# POSSIBILITY OF SUCH DAMAGE. 

27 

28""" 

29Utility functions for PDF library. 

30""" 

31__author__ = "Mathieu Fenniak" 

32__author_email__ = "biziqe@mathieu.fenniak.net" 

33 

34 

35import sys 

36 

37try: 

38 import __builtin__ as builtins 

39except ImportError: # Py3 

40 import builtins 

41 

42 

43xrange_fn = getattr(builtins, "xrange", range) 

44_basestring = getattr(builtins, "basestring", str) 

45 

46bytes_type = type(bytes()) # Works the same in Python 2.X and 3.X 

47string_type = getattr(builtins, "unicode", str) 

48int_types = (int, long) if sys.version_info[0] < 3 else (int,) 

49 

50 

51# Make basic type tests more consistent 

52def isString(s): 

53 """Test if arg is a string. Compatible with Python 2 and 3.""" 

54 return isinstance(s, _basestring) 

55 

56 

57def isInt(n): 

58 """Test if arg is an int. Compatible with Python 2 and 3.""" 

59 return isinstance(n, int_types) 

60 

61 

62def isBytes(b): 

63 """Test if arg is a bytes instance. Compatible with Python 2 and 3.""" 

64 return isinstance(b, bytes_type) 

65 

66 

67#custom implementation of warnings.formatwarning 

68def formatWarning(message, category, filename, lineno, line=None): 

69 file = filename.replace("/", "\\").rsplit("\\", 1)[1] # find the file name 

70 return "%s: %s [%s:%s]\n" % (category.__name__, message, file, lineno) 

71 

72 

73def readUntilWhitespace(stream, maxchars=None): 

74 """ 

75 Reads non-whitespace characters and returns them. 

76 Stops upon encountering whitespace or when maxchars is reached. 

77 """ 

78 txt = b_("") 

79 while True: 

80 tok = stream.read(1) 

81 if tok.isspace() or not tok: 

82 break 

83 txt += tok 

84 if len(txt) == maxchars: 

85 break 

86 return txt 

87 

88 

89def readNonWhitespace(stream): 

90 """ 

91 Finds and reads the next non-whitespace character (ignores whitespace). 

92 """ 

93 tok = WHITESPACES[0] 

94 while tok in WHITESPACES: 

95 tok = stream.read(1) 

96 return tok 

97 

98 

99def skipOverWhitespace(stream): 

100 """ 

101 Similar to readNonWhitespace, but returns a Boolean if more than 

102 one whitespace character was read. 

103 """ 

104 tok = WHITESPACES[0] 

105 cnt = 0; 

106 while tok in WHITESPACES: 

107 tok = stream.read(1) 

108 cnt+=1 

109 return (cnt > 1) 

110 

111 

112def skipOverComment(stream): 

113 tok = stream.read(1) 

114 stream.seek(-1, 1) 

115 if tok == b_('%'): 

116 while tok not in (b_('\n'), b_('\r')): 

117 tok = stream.read(1) 

118 

119 

120def readUntilRegex(stream, regex, ignore_eof=False): 

121 """ 

122 Reads until the regular expression pattern matched (ignore the match) 

123 Raise PdfStreamError on premature end-of-file. 

124 :param bool ignore_eof: If true, ignore end-of-line and return immediately 

125 """ 

126 name = b_('') 

127 while True: 

128 tok = stream.read(16) 

129 if not tok: 

130 # stream has truncated prematurely 

131 if ignore_eof == True: 

132 return name 

133 else: 

134 raise PdfStreamError("Stream has ended unexpectedly") 

135 m = regex.search(tok) 

136 if m is not None: 

137 name += tok[:m.start()] 

138 stream.seek(m.start()-len(tok), 1) 

139 break 

140 name += tok 

141 return name 

142 

143 

144class ConvertFunctionsToVirtualList(object): 

145 def __init__(self, lengthFunction, getFunction): 

146 self.lengthFunction = lengthFunction 

147 self.getFunction = getFunction 

148 

149 def __len__(self): 

150 return self.lengthFunction() 

151 

152 def __getitem__(self, index): 

153 if isinstance(index, slice): 

154 indices = xrange_fn(*index.indices(len(self))) 

155 cls = type(self) 

156 return cls(indices.__len__, lambda idx: self[indices[idx]]) 

157 if not isInt(index): 

158 raise TypeError("sequence indices must be integers") 

159 len_self = len(self) 

160 if index < 0: 

161 # support negative indexes 

162 index = len_self + index 

163 if index < 0 or index >= len_self: 

164 raise IndexError("sequence index out of range") 

165 return self.getFunction(index) 

166 

167 

168def RC4_encrypt(key, plaintext): 

169 S = [i for i in range(256)] 

170 j = 0 

171 for i in range(256): 

172 j = (j + S[i] + ord_(key[i % len(key)])) % 256 

173 S[i], S[j] = S[j], S[i] 

174 i, j = 0, 0 

175 retval = b_("") 

176 for x in range(len(plaintext)): 

177 i = (i + 1) % 256 

178 j = (j + S[i]) % 256 

179 S[i], S[j] = S[j], S[i] 

180 t = S[(S[i] + S[j]) % 256] 

181 retval += b_(chr(ord_(plaintext[x]) ^ t)) 

182 return retval 

183 

184 

185def matrixMultiply(a, b): 

186 return [[sum([float(i)*float(j) 

187 for i, j in zip(row, col)] 

188 ) for col in zip(*b)] 

189 for row in a] 

190 

191 

192def markLocation(stream): 

193 """Creates text file showing current location in context.""" 

194 # Mainly for debugging 

195 RADIUS = 5000 

196 stream.seek(-RADIUS, 1) 

197 outputDoc = open('PyPDF2_pdfLocation.txt', 'w') 

198 outputDoc.write(stream.read(RADIUS)) 

199 outputDoc.write('HERE') 

200 outputDoc.write(stream.read(RADIUS)) 

201 outputDoc.close() 

202 stream.seek(-RADIUS, 1) 

203 

204 

205class PyPdfError(Exception): 

206 pass 

207 

208 

209class PdfReadError(PyPdfError): 

210 pass 

211 

212 

213class PageSizeNotDefinedError(PyPdfError): 

214 pass 

215 

216 

217class PdfReadWarning(UserWarning): 

218 pass 

219 

220 

221class PdfStreamError(PdfReadError): 

222 pass 

223 

224 

225if sys.version_info[0] < 3: 

226 def b_(s): 

227 return s 

228else: 

229 B_CACHE = {} 

230 

231 def b_(s): 

232 bc = B_CACHE 

233 if s in bc: 

234 return bc[s] 

235 if type(s) == bytes: 

236 return s 

237 else: 

238 r = s.encode('latin-1') 

239 if len(s) < 2: 

240 bc[s] = r 

241 return r 

242 

243 

244def u_(s): 

245 if sys.version_info[0] < 3: 

246 return unicode(s, 'unicode_escape') 

247 else: 

248 return s 

249 

250 

251def str_(b): 

252 if sys.version_info[0] < 3: 

253 return b 

254 else: 

255 if type(b) == bytes: 

256 return b.decode('latin-1') 

257 else: 

258 return b 

259 

260 

261def ord_(b): 

262 if sys.version_info[0] < 3 or type(b) == str: 

263 return ord(b) 

264 else: 

265 return b 

266 

267 

268def chr_(c): 

269 if sys.version_info[0] < 3: 

270 return c 

271 else: 

272 return chr(c) 

273 

274 

275def barray(b): 

276 if sys.version_info[0] < 3: 

277 return b 

278 else: 

279 return bytearray(b) 

280 

281 

282def hexencode(b): 

283 if sys.version_info[0] < 3: 

284 return b.encode('hex') 

285 else: 

286 import codecs 

287 coder = codecs.getencoder('hex_codec') 

288 return coder(b)[0] 

289 

290 

291def hexStr(num): 

292 return hex(num).replace('L', '') 

293 

294 

295WHITESPACES = [b_(x) for x in [' ', '\n', '\r', '\t', '\x00']]