Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/PyPDF2/filters.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# vim: sw=4:expandtab:foldmethod=marker
2#
3# Copyright (c) 2006, Mathieu Fenniak
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9#
10# * Redistributions of source code must retain the above copyright notice,
11# this list of conditions and the following disclaimer.
12# * Redistributions in binary form must reproduce the above copyright notice,
13# this list of conditions and the following disclaimer in the documentation
14# and/or other materials provided with the distribution.
15# * The name of the author may not be used to endorse or promote products
16# derived from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28# POSSIBILITY OF SUCH DAMAGE.
31"""
32Implementation of stream filters for PDF.
33"""
34__author__ = "Mathieu Fenniak"
35__author_email__ = "biziqe@mathieu.fenniak.net"
37from .utils import PdfReadError, ord_, chr_
38from sys import version_info
39if version_info < ( 3, 0 ):
40 from cStringIO import StringIO
41else:
42 from io import StringIO
43 import struct
45try:
46 import zlib
48 def decompress(data):
49 return zlib.decompress(data)
51 def compress(data):
52 return zlib.compress(data)
54except ImportError:
55 # Unable to import zlib. Attempt to use the System.IO.Compression
56 # library from the .NET framework. (IronPython only)
57 import System
58 from System import IO, Collections, Array
60 def _string_to_bytearr(buf):
61 retval = Array.CreateInstance(System.Byte, len(buf))
62 for i in range(len(buf)):
63 retval[i] = ord(buf[i])
64 return retval
66 def _bytearr_to_string(bytes):
67 retval = ""
68 for i in range(bytes.Length):
69 retval += chr(bytes[i])
70 return retval
72 def _read_bytes(stream):
73 ms = IO.MemoryStream()
74 buf = Array.CreateInstance(System.Byte, 2048)
75 while True:
76 bytes = stream.Read(buf, 0, buf.Length)
77 if bytes == 0:
78 break
79 else:
80 ms.Write(buf, 0, bytes)
81 retval = ms.ToArray()
82 ms.Close()
83 return retval
85 def decompress(data):
86 bytes = _string_to_bytearr(data)
87 ms = IO.MemoryStream()
88 ms.Write(bytes, 0, bytes.Length)
89 ms.Position = 0 # fseek 0
90 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
91 bytes = _read_bytes(gz)
92 retval = _bytearr_to_string(bytes)
93 gz.Close()
94 return retval
96 def compress(data):
97 bytes = _string_to_bytearr(data)
98 ms = IO.MemoryStream()
99 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
100 gz.Write(bytes, 0, bytes.Length)
101 gz.Close()
102 ms.Position = 0 # fseek 0
103 bytes = ms.ToArray()
104 retval = _bytearr_to_string(bytes)
105 ms.Close()
106 return retval
109class FlateDecode(object):
110 def decode(data, decodeParms):
111 data = decompress(data)
112 predictor = 1
113 if decodeParms:
114 try:
115 predictor = decodeParms.get("/Predictor", 1)
116 except AttributeError:
117 pass # usually an array with a null object was read
119 # predictor 1 == no predictor
120 if predictor != 1:
121 columns = decodeParms["/Columns"]
122 # PNG prediction:
123 if predictor >= 10 and predictor <= 15:
124 output = StringIO()
125 # PNG prediction can vary from row to row
126 rowlength = columns + 1
127 assert len(data) % rowlength == 0
128 prev_rowdata = (0,) * rowlength
129 for row in range(len(data) // rowlength):
130 rowdata = [ord_(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
131 filterByte = rowdata[0]
132 if filterByte == 0:
133 pass
134 elif filterByte == 1:
135 for i in range(2, rowlength):
136 rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
137 elif filterByte == 2:
138 for i in range(1, rowlength):
139 rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
140 else:
141 # unsupported PNG filter
142 raise PdfReadError("Unsupported PNG filter %r" % filterByte)
143 prev_rowdata = rowdata
144 output.write(''.join([chr(x) for x in rowdata[1:]]))
145 data = output.getvalue()
146 else:
147 # unsupported predictor
148 raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
149 return data
150 decode = staticmethod(decode)
152 def encode(data):
153 return compress(data)
154 encode = staticmethod(encode)
157class ASCIIHexDecode(object):
158 def decode(data, decodeParms=None):
159 retval = ""
160 char = ""
161 x = 0
162 while True:
163 c = data[x]
164 if c == ">":
165 break
166 elif c.isspace():
167 x += 1
168 continue
169 char += c
170 if len(char) == 2:
171 retval += chr(int(char, base=16))
172 char = ""
173 x += 1
174 assert char == ""
175 return retval
176 decode = staticmethod(decode)
179class LZWDecode(object):
180 """Taken from:
181 http://www.java2s.com/Open-Source/Java-Document/PDF/PDF-Renderer/com/sun/pdfview/decode/LZWDecode.java.htm
182 """
183 class decoder(object):
184 def __init__(self, data):
185 self.STOP=257
186 self.CLEARDICT=256
187 self.data=data
188 self.bytepos=0
189 self.bitpos=0
190 self.dict=[""]*4096
191 for i in range(256):
192 self.dict[i]=chr(i)
193 self.resetDict()
195 def resetDict(self):
196 self.dictlen=258
197 self.bitspercode=9
199 def nextCode(self):
200 fillbits=self.bitspercode
201 value=0
202 while fillbits>0 :
203 if self.bytepos >= len(self.data):
204 return -1
205 nextbits=ord(self.data[self.bytepos])
206 bitsfromhere=8-self.bitpos
207 if bitsfromhere>fillbits:
208 bitsfromhere=fillbits
209 value |= (((nextbits >> (8-self.bitpos-bitsfromhere)) &
210 (0xff >> (8-bitsfromhere))) <<
211 (fillbits-bitsfromhere))
212 fillbits -= bitsfromhere
213 self.bitpos += bitsfromhere
214 if self.bitpos >=8:
215 self.bitpos=0
216 self.bytepos = self.bytepos+1
217 return value
219 def decode(self):
220 """ algorithm derived from:
221 http://www.rasip.fer.hr/research/compress/algorithms/fund/lz/lzw.html
222 and the PDFReference
223 """
224 cW = self.CLEARDICT;
225 baos=""
226 while True:
227 pW = cW;
228 cW = self.nextCode();
229 if cW == -1:
230 raise PdfReadError("Missed the stop code in LZWDecode!")
231 if cW == self.STOP:
232 break;
233 elif cW == self.CLEARDICT:
234 self.resetDict();
235 elif pW == self.CLEARDICT:
236 baos+=self.dict[cW]
237 else:
238 if cW < self.dictlen:
239 baos += self.dict[cW]
240 p=self.dict[pW]+self.dict[cW][0]
241 self.dict[self.dictlen]=p
242 self.dictlen+=1
243 else:
244 p=self.dict[pW]+self.dict[pW][0]
245 baos+=p
246 self.dict[self.dictlen] = p;
247 self.dictlen+=1
248 if (self.dictlen >= (1 << self.bitspercode) - 1 and
249 self.bitspercode < 12):
250 self.bitspercode+=1
251 return baos
253 @staticmethod
254 def decode(data,decodeParams=None):
255 return LZWDecode.decoder(data).decode()
258class ASCII85Decode(object):
259 def decode(data, decodeParms=None):
260 if version_info < ( 3, 0 ):
261 retval = ""
262 group = []
263 x = 0
264 hitEod = False
265 # remove all whitespace from data
266 data = [y for y in data if not (y in ' \n\r\t')]
267 while not hitEod:
268 c = data[x]
269 if len(retval) == 0 and c == "<" and data[x+1] == "~":
270 x += 2
271 continue
272 #elif c.isspace():
273 # x += 1
274 # continue
275 elif c == 'z':
276 assert len(group) == 0
277 retval += '\x00\x00\x00\x00'
278 x += 1
279 continue
280 elif c == "~" and data[x+1] == ">":
281 if len(group) != 0:
282 # cannot have a final group of just 1 char
283 assert len(group) > 1
284 cnt = len(group) - 1
285 group += [ 85, 85, 85 ]
286 hitEod = cnt
287 else:
288 break
289 else:
290 c = ord(c) - 33
291 assert c >= 0 and c < 85
292 group += [ c ]
293 if len(group) >= 5:
294 b = group[0] * (85**4) + \
295 group[1] * (85**3) + \
296 group[2] * (85**2) + \
297 group[3] * 85 + \
298 group[4]
299 assert b < (2**32 - 1)
300 c4 = chr((b >> 0) % 256)
301 c3 = chr((b >> 8) % 256)
302 c2 = chr((b >> 16) % 256)
303 c1 = chr(b >> 24)
304 retval += (c1 + c2 + c3 + c4)
305 if hitEod:
306 retval = retval[:-4+hitEod]
307 group = []
308 x += 1
309 return retval
310 else:
311 if isinstance(data, str):
312 data = data.encode('ascii')
313 n = b = 0
314 out = bytearray()
315 for c in data:
316 if ord('!') <= c and c <= ord('u'):
317 n += 1
318 b = b*85+(c-33)
319 if n == 5:
320 out += struct.pack(b'>L',b)
321 n = b = 0
322 elif c == ord('z'):
323 assert n == 0
324 out += b'\0\0\0\0'
325 elif c == ord('~'):
326 if n:
327 for _ in range(5-n):
328 b = b*85+84
329 out += struct.pack(b'>L',b)[:n-1]
330 break
331 return bytes(out)
332 decode = staticmethod(decode)
335def decodeStreamData(stream):
336 from .generic import NameObject
337 filters = stream.get("/Filter", ())
338 if len(filters) and not isinstance(filters[0], NameObject):
339 # we have a single filter instance
340 filters = (filters,)
341 data = stream._data
342 # If there is not data to decode we should not try to decode the data.
343 if data:
344 for filterType in filters:
345 if filterType == "/FlateDecode" or filterType == "/Fl":
346 data = FlateDecode.decode(data, stream.get("/DecodeParms"))
347 elif filterType == "/ASCIIHexDecode" or filterType == "/AHx":
348 data = ASCIIHexDecode.decode(data)
349 elif filterType == "/LZWDecode" or filterType == "/LZW":
350 data = LZWDecode.decode(data, stream.get("/DecodeParms"))
351 elif filterType == "/ASCII85Decode" or filterType == "/A85":
352 data = ASCII85Decode.decode(data)
353 elif filterType == "/Crypt":
354 decodeParams = stream.get("/DecodeParams", {})
355 if "/Name" not in decodeParams and "/Type" not in decodeParams:
356 pass
357 else:
358 raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
359 else:
360 # unsupported filter
361 raise NotImplementedError("unsupported filter %s" % filterType)
362 return data