1
2
3
4 """Checksumming convenience classes
5
6 TODO! Sorry!
7
8 [@]SourceFileOrURL Alg Digest Length ModTime TargetFileOrURL
9 TOKEN NUMBER: 1 2 3 4 5 6
10
11 """
12
13 from __future__ import with_statement
14
15 COLUMNS = { 0:"SourceFileOrURL",
16 1:"Alg",
17 2:"Digest",
18 3:"Length",
19 4:"ModTime",
20 5:"TargetFileOrURL",
21 }
22
23
24 import os, sys
25 from stat import *
26
27 import re
28
29 from collections import defaultdict
30
31 import hashlib
32
33 import codecs
34
35 import logging
36
37 logging.basicConfig(level=logging.INFO)
38
39 logger = logging.getLogger('checkm')
40
42 """The item or directory was either not found, or not accessible."""
44 """
45 FIXME
46 @param *arg:
47 @type *arg:
48 @param **kw:
49 @type **kw:
50 """
51 self.context = (arg, kw)
53 """
54 FIXME
55 """
56 return self.context.__str__()
58 """
59 FIXME
60 """
61 return self.context.__str__()
62
64 COLUMN_NAMES = [u'# [@]SourceFileOrURL',u'Alg',u'Digest',u'Length',u'ModTime']
70
72 """
73 FIXME
74 @param report:
75 @type report:
76 """
77 cols = defaultdict(lambda : 0)
78 for line in report:
79 for index in xrange(len(line)):
80 if len(line[index])>cols[index]:
81 cols[index] = len(line[index])
82 return cols
83
85 """
86 FIXME
87 @param line:
88 @type line:
89 @param col_maxes:
90 @type col_maxes:
91 """
92 spaced_line = []
93 for index in xrange(len(line)):
94 spaced_line.append(line[index])
95 spaces = col_maxes[index]-len(line[index])+4
96 spaced_line.append(u" "*spaces)
97 return u"".join(spaced_line)
98
99 - def create_bagit_manifest(self, scan_directory, algorithm, recursive=False, delimiter = " ", filename=None):
100 """
101 FIXME
102 @param scan_directory:
103 @type scan_directory:
104 @param algorithm:
105 @type algorithm:
106 @param recursive=False:
107 @type recursive=False:
108 @param delimiter:
109 @type delimiter:
110 @param filename=None:
111 @type filename=None:
112 """
113 if not filename:
114 filename = "manifest-%s.txt" % algorithm
115 logger.info("Creating bagit manifest file(%s) for dir(%s) with Alg:%s" % (filename,
116 scan_directory,
117 algorithm))
118 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=3)
119 if hasattr(filename, 'write'):
120 for line in report:
121 if line[2] != "d":
122 filename.write("%s%s%s\n" % (line[2], delimiter, line[0]))
123 filename.write("\n")
124 else:
125 with codecs.open(filename, encoding='utf-8', mode="w") as output:
126 for line in report:
127 if line[2] != "d":
128 output.write("%s%s%s\n" % (line[2], delimiter, line[0]))
129 output.write("\n")
130 return filename
131
133 logger.info("Creating multilevel checkm files '(%s)' from top level directory(%s) with Alg:%s and columns:%s" % (checkm_filename, top_directory, algorithm, columns))
134 if not os.path.isdir(top_directory):
135 raise NotFound(top_directory=top_directory)
136
137
138
139 dirs = dict([(root, dirnames) for (root, dirnames, _) in os.walk(top_directory, topdown=False)])
140
141 for dirname in dirs:
142 with codecs.open(os.path.join(dirname, checkm_filename), encoding='utf-8', mode="w") as output:
143 self.create_checkm_file(dirname,
144 algorithm,
145 os.path.join(dirname, checkm_filename),
146 recursive=False,
147 columns=columns,
148 checkm_file=output)
149 subdir_report = []
150 for subdir in dirs[dirname]:
151 try:
152 line = self.scanner.scan_path(os.path.join(subdir, checkm_filename), algorithm, columns)
153 line[0] = '@%s' % (line[0])
154 subdir_report.append(line)
155 except Exception, e:
156 print "Fail! %s" % e
157 col_maxes = self._get_max_len(subdir_report)
158 for line in subdir_report:
159 output.write('%s\n' % (self._space_line(line, col_maxes)))
160 output.write('\n')
161
162 - def create_checkm_file(self, scan_directory, algorithm, checkm_filename, recursive=False, columns=3, checkm_file=None):
163 logger.info("Creating checkm file for dir(%s) with Alg:%s and columns: %s" % (
164 scan_directory,
165 algorithm, columns))
166 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=columns)
167 col_maxes = self._get_max_len(report)
168 if checkm_file != None and hasattr(checkm_file, 'write'):
169 checkm_file.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
170 for line in report:
171 logger.info("Checking that the scanned file is not the empty checkm file")
172 logger.info("Scanned file: %s vs Checkm filename: %s" % (line[0], checkm_filename))
173 if line[0] != checkm_filename:
174 checkm_file.write("%s\n" % (self._space_line(line, col_maxes)))
175 else:
176 logger.info("MATCH! - scan line ignored")
177 return checkm_file
178 else:
179 with codecs.open(checkm_filename, encoding='utf-8', mode="w") as output:
180 output.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
181 for line in report:
182 if line[0] != os.path.join(scan_directory, checkm_filename):
183 output.write("%s\n" % (self._space_line(line, col_maxes)))
184 output.write("\n")
185
187 """
188 FIXME
189 @param bagit_filename:
190 @type bagit_filename:
191 @param algorithm=None:
192 @type algorithm=None:
193 """
194 logger.info("Checking files against '%s' bagit manifest" % bagit_filename)
195 if algorithm == None:
196 if hasattr(bagit_filename, 'read'):
197 raise Exception("Need to supply the algorithm when passing a filelike object instead of a filename")
198 m = re.search("manifest-(?P<alg>[^\.]+)\.txt", bagit_filename)
199 if m != None:
200 algorithm = m.groupdict()['alg']
201 parser = BagitParser(bagit_filename)
202 scanner = CheckmScanner()
203 results = {'pass':[], 'fail':{}}
204 for row in parser:
205 if row:
206 try:
207 scan_row = scanner.scan_path(row[1], algorithm, 3)
208 if row[0] != scan_row[2]:
209 logger.info("Failed original: %s" % row)
210 logger.info("Current scan: %s" % scan_row)
211 results['fail'][row[1]] = (row, scan_row)
212 else:
213 results['pass'].append(row[1])
214 except NotFound:
215 scan_row = "File not found"
216 logger.info("Failed original: %s" % row)
217 logger.info("But file not found at this path.")
218 results['fail'][row[1]] = (row, scan_row)
219 return results
220
222 """
223 FIXME
224 @param scan_directory:
225 @type scan_directory:
226 @param checkm_filename:
227 @type checkm_filename:
228 """
229 def _check_files_against_parser(parser):
230 scanner = CheckmScanner()
231 results = {'pass':[], 'fail':{}, 'include':[]}
232 for row in parser:
233 if row:
234 try:
235 if row[0].startswith('@'):
236 row[0] = row[0][1:]
237 results['include'].append(row[0])
238 scan_row = scanner.scan_path(row[0], row[1], len(row))
239 if row != scan_row:
240 logger.info("Failed original: %s" % row)
241 logger.info("Current scan: %s" % scan_row)
242 results['fail'][row[0]] = (row, scan_row)
243 else:
244 results['pass'].append(row[0])
245 except NotFound:
246 scan_row = "File not found"
247 logger.info("Failed original: %s" % row)
248 logger.info("But file not found at this path.")
249 results['fail'][row[0]] = (row, scan_row)
250 return results
251
252 logger.info("Checking files against %s checkm manifest" % checkm_filename)
253 parser = CheckmParser(checkm_filename)
254 results = _check_files_against_parser(parser)
255 if ignore_multilevel:
256 return results
257 else:
258
259 checkm_list = results['include'][:]
260 while checkm_list:
261 checkm_file = checkm_list.pop()
262 parser = CheckmParser(checkm_file)
263 additional_results = _check_files_against_parser(parser)
264
265 results['pass'].extend(additional_results['pass'])
266
267 results['include'].extend(additional_results['include'])
268 checkm_list.extend(additional_results['include'])
269
270 results['fail'].update(additional_results['fail'])
271 return results
272
275 """
276 FIXME
277 @param bagit_file=None:
278 @type bagit_file=None:
279 """
280 self.status = False
281 self.lines = []
282 if bagit_file:
283 self.parse(bagit_file)
284
286 """
287 FIXME
288 """
289 class Bagit_iter:
290 def __init__(self, lines):
291 """
292 FIXME
293 @param lines:
294 @type lines:
295 """
296 self.lines = lines
297 self.last = 0
298 def __iter__(self):
299 """
300 FIXME
301 """
302 return self
303 def next(self):
304 """
305 FIXME
306 """
307 if self.last >= len(self.lines):
308 raise StopIteration
309 elif len(self.lines) == 0:
310 raise StopIteration
311 else:
312 self.last += 1
313 return self.lines[self.last-1]
314 return Bagit_iter(self.lines)
315
316 - def parse(self, fileobj):
317 """
318 FIXME
319 @param fileobj:
320 @type fileobj:
321 """
322 if not hasattr(fileobj, "read"):
323 with codecs.open(fileobj, encoding='utf-8', mode="r") as check_fh:
324 self._parse_lines(check_fh)
325 else:
326 self._parse_lines(fileobj)
327 return self.lines
328
330 """
331 FIXME
332 @param fh:
333 @type fh:
334 """
335 self.lines = []
336 line_buffer = ""
337 def _parse_line(line):
338 """
339 FIXME
340 @param line:
341 @type line:
342 """
343 if not line.startswith('#'):
344 tokens = filter(lambda x: x, re.split("\s+", line, 1))
345 logger.info(tokens)
346 if tokens:
347
348 if tokens[1].startswith("*"):
349 tokens[1] = tokens[1][1:].strip()
350 self.lines.append(tokens)
351 for chunk in fh.read(0x1000):
352 line_buffer = line_buffer + chunk
353 while True:
354 if not line_buffer:
355 break
356 fragments = line_buffer.split('\n',1)
357 if len(fragments) == 1:
358 break
359 _parse_line(fragments[0])
360 line_buffer = fragments[1]
361
364 """
365 FIXME
366 @param checkm_file=None:
367 @type checkm_file=None:
368 """
369 self.status = False
370 self.lines = []
371 if checkm_file:
372 self.parse(checkm_file)
373
375 """
376 FIXME
377 """
378 class Checkm_iter:
379 def __init__(self, lines):
380 """
381 FIXME
382 @param lines:
383 @type lines:
384 """
385 self.lines = lines
386 self.last = 0
387 def __iter__(self):
388 """
389 FIXME
390 """
391 return self
392 def next(self):
393 """
394 FIXME
395 """
396 if self.last >= len(self.lines):
397 raise StopIteration
398 elif len(self.lines) == 0:
399 raise StopIteration
400 else:
401 self.last += 1
402 return self.lines[self.last-1]
403 return Checkm_iter(self.lines)
404
405 - def parse(self, checkm_file):
406 """
407 FIXME
408 @param checkm_file:
409 @type checkm_file:
410 """
411 if not hasattr(checkm_file, "readline"):
412 if os.path.isfile(checkm_file):
413 with codecs.open(checkm_file, encoding='utf-8', mode="r") as check_fh:
414 self._parse_lines(check_fh)
415 else:
416 raise NotFound(checkm_file=checkm_file)
417 else:
418 self._parse_lines(checkm_file)
419 return self.lines
420
422 """
423 FIXME
424 @param fh:
425 @type fh:
426 """
427 self.lines = []
428 line_buffer = ""
429 def _parse_line(line):
430 """
431 FIXME
432 @param line:
433 @type line:
434 """
435 if not line.startswith('#'):
436 tokens = filter(lambda x: x, re.split("\s+", line, 5))
437 logger.info(tokens)
438 if tokens:
439 self.lines.append(tokens)
440
441 for chunk in fh.read(0x1000):
442 line_buffer = line_buffer + chunk
443 while True:
444 if not line_buffer:
445 break
446 fragments = line_buffer.split('\n',1)
447 if len(fragments) == 1:
448 break
449 _parse_line(fragments[0])
450 line_buffer = fragments[1]
451
453 HASHTYPES = ['md5', 'sha1', 'sha224','sha256','sha384','sha512']
454 - def scan_local(self, directory_path, algorithm, columns=3):
455 """
456 FIXME
457 @param directory_path:
458 @type directory_path:
459 @param algorithm:
460 @type algorithm:
461 @param columns=3:
462 @type columns=3:
463 """
464 report = []
465 for item in os.listdir(directory_path):
466 item_path = os.path.join(directory_path, item)
467 report.append(self.scan_path(item_path, algorithm, columns))
468 return report
469
470 - def scan_tree(self, directory_path, algorithm, columns):
471 """
472 FIXME
473 @param directory_path:
474 @type directory_path:
475 @param algorithm:
476 @type algorithm:
477 @param columns:
478 @type columns:
479 """
480 report = []
481 if os.path.exists(directory_path):
482 for (dirpath, dirnames, filenames) in os.walk(directory_path):
483 for item_path in [os.path.join(dirpath, x) for x in dirnames+filenames]:
484 report.append(self.scan_path(item_path, algorithm, columns))
485 return report
486 else:
487 raise NotFound(directory_path=directory_path, recursive=True)
488
489 - def scan_path(self, item_path, algorithm, columns):
490 """
491 FIXME
492 @param item_path:
493 @type item_path:
494 @param algorithm:
495 @type algorithm:
496 @param columns:
497 @type columns:
498 """
499 if columns<3 or not isinstance(columns, int):
500 columns = 3
501 try:
502 line = []
503
504 line.append(unicode(item_path))
505
506 line.append(unicode(algorithm))
507
508 if os.path.isdir(item_path):
509 line.append(u'd')
510 else:
511
512 hash_gen = getattr(hashlib, algorithm)()
513 with open(item_path, 'rb') as fh:
514 logger.info("Checking %s with algorithm %s" % (item_path, algorithm))
515 chunk = fh.read(1024*8)
516 while chunk:
517 hash_gen.update(chunk)
518 chunk= fh.read(1024*8)
519 line.append(unicode(hash_gen.hexdigest()))
520 if columns>3:
521
522 line.append(unicode(os.stat(item_path)[ST_SIZE]))
523 if columns>4:
524
525 line.append(unicode(os.stat(item_path)[ST_MTIME]))
526 return line
527 except OSError:
528 raise NotFound(item_path=item_path)
529 except IOError:
530 raise NotFound(item_path=item_path)
531 except AttributeError:
532 raise ValueError("This tool cannot perform hashtype %s" % algorithm)
533
534 - def scan_directory(self, directory_path, algorithm, recursive=False, columns=3):
535 """
536 FIXME
537 @param directory_path:
538 @type directory_path:
539 @param algorithm:
540 @type algorithm:
541 @param recursive=False:
542 @type recursive=False:
543 @param columns=3:
544 @type columns=3:
545 """
546 if os.path.exists(directory_path):
547 if recursive:
548 return self.scan_tree(directory_path, algorithm, columns)
549 return self.scan_local(directory_path, algorithm, columns)
550 else:
551 raise NotFound(directory_path=directory_path, recursive=recursive)
552