1
2
3
4 """Checkm class library docs TODO
5
6
7 [@]SourceFileOrURL Alg Digest Length ModTime TargetFileOrURL
8 TOKEN NUMBER: 1 2 3 4 5 6
9
10 """
11
12 import os, sys
13 from stat import *
14
15 from collections import defaultdict
16
17 import hashlib
18
19 import codecs
20
21 import logging
22
23 logging.basicConfig(level=logging.INFO)
24
25 logger = logging.getLogger('checkm')
26
28 """The directory was not found, or is not accessible."""
30 self.context = (arg, kw)
32 return self.context.__str__()
33
35 COLUMN_NAMES = [u'# [@]SourceFileOrURL',u'Alg',u'Digest',u'Length',u'ModTime']
38
40 cols = defaultdict(lambda : 0)
41 for line in report:
42 for index in xrange(len(line)):
43 if len(line[index])>cols[index]:
44 cols[index] = len(line[index])
45 return cols
46
48 spaced_line = []
49 for index in xrange(len(line)):
50 spaced_line.append(line[index])
51 spaces = col_maxes[index]-len(line[index])+4
52 spaced_line.append(u" "*spaces)
53 return u"".join(spaced_line)
54
55 - def create_checkm_file(self, scan_directory, algorithm, checkm_filename, recursive=False, columns=3):
56 logger.info("Creating checkm file(%s) for dir(%s) with Alg:%s and columns: %s" % (checkm_filename,
57 scan_directory,
58 algorithm, columns))
59 report = self.scanner.scan_directory(scan_directory, algorithm, recursive=recursive, columns=columns)
60 col_maxes = self._get_max_len(report)
61 with codecs.open(checkm_filename, encoding='utf-8', mode="w") as output:
62 output.write("%s \n" % (self._space_line(CheckmReporter.COLUMN_NAMES[:columns], col_maxes)))
63 for line in report:
64 output.write("%s\n" % (self._space_line(line, col_maxes)))
65 output.write("\n")
66
68 HASHTYPES = ['md5', 'sha1', 'sha224','sha256','sha384','sha512']
69 - def scan_local(self, directory_path, algorithm, columns=3):
70 report = []
71 for item in os.listdir(directory_path):
72 item_path = os.path.join(directory_path, item)
73 report.append(self.scan_path(item_path, algorithm, columns))
74 return report
75
76 - def scan_tree(self, directory_path, algorithm, columns):
77 report = []
78 if os.path.exists(directory_path):
79 for (dirpath, dirnames, filenames) in os.walk(directory_path):
80 for item_path in [os.path.join(dirpath, x) for x in dirnames+filenames]:
81 report.append(self.scan_path(item_path, algorithm, columns))
82 return report
83 else:
84 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive)
85
86 - def scan_path(self, item_path, algorithm, columns):
87 if columns<3 or not isinstance(columns, int):
88 columns = 3
89 try:
90 line = []
91
92 line.append(unicode(item_path))
93
94 line.append(unicode(algorithm))
95
96 if os.path.isdir(item_path):
97 line.append(u'd')
98 else:
99
100 hash_gen = getattr(hashlib, algorithm)()
101 with open(item_path, 'rb') as fh:
102 logger.info("Checking %s with algorithm %s" % (item_path, algorithm))
103 chunk = fh.read(1024*8)
104 while chunk:
105 hash_gen.update(chunk)
106 chunk= fh.read(1024*8)
107 line.append(unicode(hash_gen.hexdigest()))
108 if columns>3:
109
110 line.append(unicode(os.stat(item_path)[ST_SIZE]))
111 if columns>4:
112
113 line.append(unicode(os.stat(item_path)[ST_MTIME]))
114 return line
115 except OSError:
116 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive)
117 except AttributeError:
118 raise ValueError("This tool cannot perform hashtype %s" % algorithm)
119
120 - def scan_directory(self, directory_path, algorithm, recursive=False, columns=3):
121 if os.path.exists(directory_path):
122 if recursive:
123 return self.scan_tree(directory_path, algorithm, columns)
124 return self.scan_local(directory_path, algorithm, columns)
125 else:
126 raise DirectoryNotFound(directory_path=directory_path, recursive=recursive)
127