Coverage for /var/devmt/py/utils4_1.6.0/utils4/srccheck.py: 100%

80 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-11-13 09:50 +0000

1#!/usr/bin/env python 

2# -*- coding: utf-8 -*- 

3""" 

4:Purpose: This module is used to perform checksum calculations on a 

5 collection of files to verify if the checksum *calculated* on each 

6 file matches the *expected* checksum value. 

7 

8 In practical terms, an application can call the 

9 :meth:`~SourceCheck.check` method by passing a list of filepaths 

10 to be checksummed, along with a reference file (containing the 

11 expected checksums). If the checksum values match the reference 

12 file, a value of ``True`` is returned to the caller application, 

13 signaling the inspected source code files have *not* been modified 

14 and are 'safe' for use. Otherwise, a value of ``False`` is 

15 returned to the caller the filenames of each failing file are 

16 printed to the terminal. 

17 

18:Platform: Linux/Windows | Python 3.7+ 

19:Developer: J Berendt 

20:Email: development@s3dev.uk 

21 

22:Comments: n/a 

23 

24:Example usage: 

25 

26 Generate an *un-encrypted* reference file:: 

27 

28 >>> from utils4.srccheck import srccheck 

29 

30 >>> files = ['list.c', 'of.py', 'files.sql'] 

31 >>> srccheck.generate(filepaths=files, encrypt=False) 

32 

33 

34 Verify checksums from within an application, with an *un-encrypted* 

35 reference file:: 

36 

37 >>> from utils4.srccheck import srccheck 

38 

39 >>> srccheck.check(ref_file='path/to/srccheck.ref') 

40 True 

41 

42 Generate an **encrypted** reference file:: 

43 

44 >>> from utils4.srccheck import srccheck 

45 

46 >>> files = ['list.c', 'of.py', 'files.sql'] 

47 >>> srccheck.generate(filepaths=files, encrypt=True) 

48 

49 

50 Verify checksums from within an application, with an *encrypted* reference 

51 file:: 

52 

53 >>> from utils4.srccheck import srccheck 

54 

55 >>> srccheck.check(ref_file='path/to/srccheck.ref', 

56 key_file='path/to/srccheck.key') 

57 True 

58 

59 

60 **Advanced usage:** 

61 

62 If you wish to *delay the output* of mismatched files (to give the caller 

63 application display control), the caller can redirected the output from 

64 the :meth:`~SourceCheck.check` method into a buffer and display at a more 

65 appropriate time. For example:: 

66 

67 >>> from contextlib import redirect_stdout 

68 >>> from io import StringIO 

69 >>> from utils4.srccheck import srccheck 

70 

71 >>> buff = StringIO() 

72 >>> with redirect_stdout(buff): 

73 >>> test = srccheck.check(ref_file='path/to/srccheck.ref') 

74 

75 >>> # ... 

76 

77 >>> if not test: 

78 >>> print(buff.getvalue()) 

79 >>> buff.close() 

80 

81 Checksum verification has failed for the following: 

82 - 02-01_first.c 

83 - 10-09_ptr_exchange.c 

84 - 06-ex07.c 

85 - 15-ex05_col_output.c 

86 - 02-03_multi_lines.c 

87 

88""" 

89# pylint: disable=wrong-import-order 

90 

91import json 

92import os 

93import pickle 

94import sys 

95import uuid 

96from cryptography import fernet 

97from typing import List 

98from utils4.crypto import crypto 

99 

100 

101class SourceCheck: 

102 """Verify source code checksums values are as expected.""" 

103 

104 def check(self, ref_file: str, key_file: str='') -> bool: 

105 """Verify the provided source code file checksums are as expected. 

106 

107 If any checksum do not match, the names of those files are reported 

108 to the terminal. 

109 

110 Args: 

111 ref_file (str): Full path to the reference file containing the 

112 full paths to the file(s) to be tested and the associated 

113 checksum value(s). 

114 key_file (str, optional): Full path to the key file. If a key file 

115 is not provided, the method assumes the reference file is in 

116 plaintext CSV and does not attempt to decrypt. 

117 Defaults to ''. 

118 

119 Note: 

120 If the ``key_file`` argument is *not* provided, it is assumed the 

121 ``ref_file`` is a plaintext CSV file, and decryption is *not* 

122 attempted. 

123 

124 If the ``key_file`` argument *is* provided, it is assumed the 

125 ``ref_file`` has been encrypted, and decryption is carried out. 

126 

127 Raises: 

128 FileNotFoundError: If either the reference file, or key file do 

129 not exist. 

130 

131 Returns: 

132 bool: True if all file's checksum values agree with the checksum 

133 listed in the reference file; otherwise False. 

134 

135 """ 

136 # pylint: disable=no-else-return 

137 if not os.path.exists(ref_file): 

138 raise FileNotFoundError(f'Reference file not found: {ref_file}') 

139 if all([key_file, not os.path.exists(key_file)]): 

140 raise FileNotFoundError(f'Key file not found: {key_file}') 

141 if key_file: 

142 # Decrypt reference file. 

143 with open(ref_file, 'rb') as rfp: 

144 data = pickle.load(rfp) 

145 with open(key_file, 'rb') as kfp: 

146 f = fernet.Fernet(kfp.read()) 

147 ref = json.loads(f.decrypt(data).decode()) 

148 else: 

149 # Read plaintext reference file. 

150 ref = {} 

151 with open(ref_file, 'r', encoding='utf-8') as rfp: 

152 for line in rfp: 

153 ref.update([line.strip().split(',')]) 

154 chksums = self._checksum(files=ref.keys()) 

155 # Object check for quick validation. 

156 if chksums == ref: 

157 return True 

158 else: 

159 self._report_mismatches(checksums=chksums, reference=ref) 

160 return False 

161 

162 def generate(self, filepaths: List[str], encrypt: bool=False): 

163 """Generate the reference file containing the source file checksums, 

164 and the associated key file. 

165 

166 Args: 

167 filepaths (list[str]): A list of full paths which are to be 

168 included in the reference file. 

169 encrypt (bool, optional): Encrypt the reference file and generate 

170 a key file. Defaults to False. 

171 

172 :Reference File: 

173 

174 **If unencrypted:** 

175 

176 The reference file is a flat, plaintext CSV file with the file 

177 path as the first field and the checksum value as the second field. 

178 

179 For example:: 

180 

181 filepath_01,md5_hash_string_01 

182 filepath_02,md5_hash_string_02 

183 filepath_03,md5_hash_string_03 

184 ... 

185 filepath_NN,md5_hash_string_NN 

186 

187 **If encrypted:** 

188 

189 The reference file contains is a serialised, encrypted 

190 representation of the full path and associated checksum value for 

191 all provided files, in JSON format. This data is written to the 

192 ``srccheck.ref`` file. 

193 

194 A unique encryption key is created and stored with *each* call to 

195 this method, and stored to the ``srccheck.key`` file. 

196 

197 To perform checks, both the reference file *and* the key file must 

198 be provided to the :meth:`~check` method. 

199 

200 .. note:: These files are a **pair**. If one file is lost, the 

201 other file is useless. 

202 

203 :Layout: 

204 

205 **If encrypted:** 

206 

207 The layout of the *deserialised* and *decrypted* reference file is 

208 in basic JSON format, with the filename as the keys, and checksum 

209 values as the values. 

210 

211 For example:: 

212 

213 {"filepath_01": "md5_hash_string_01", 

214 "filepath_02": "md5_hash_string_02", 

215 "filepath_03": "md5_hash_string_03", 

216 ..., 

217 "filepath_NN": "md5_hash_string_NN"} 

218 

219 Raises: 

220 FileNotFoundError: If any of the files provided to the 

221 ``filepaths`` argument do not exist. 

222 

223 """ 

224 if not self._all_files_exist(files=filepaths): 

225 raise FileNotFoundError('The files listed above were not found.') 

226 op_ref, op_key = self._build_outpaths() 

227 chksums = self._checksum(files=filepaths) 

228 if encrypt: 

229 key = crypto.b64(uuid.uuid4().hex, decode=False) 

230 with open(op_key, 'wb') as kfp: 

231 kfp.write(key) 

232 f = fernet.Fernet(key=key) 

233 with open(op_ref, 'wb') as rfp: 

234 pickle.dump(f.encrypt(json.dumps(chksums).encode()), rfp) 

235 print('\nComplete.\nThe reference and key files are available on your desktop.') 

236 else: 

237 with open(op_ref, 'w', encoding='utf-8') as rfp: 

238 for k, v in chksums.items(): 

239 rfp.write(f'{k},{v}\n') 

240 print('\nComplete.\nThe reference file is available on your desktop.') 

241 

242 @staticmethod 

243 def _all_files_exist(files: list) -> bool: 

244 """Verify all provided files exist. 

245 

246 If any file does not exist, the user is alerted via the terminal and a 

247 ``FileNotFoundError`` exception is raised by the caller. 

248 

249 Args: 

250 files (list): List of files to be tested. 

251 

252 Returns: 

253 bool: True, if all files exist, otherwise False. 

254 

255 """ 

256 # pylint: disable=consider-using-f-string 

257 success = True 

258 nexist = [] 

259 for f in files: 

260 if not os.path.exists(f): 

261 nexist.append(f) 

262 success = False 

263 if nexist: 

264 print('\nThe following files do not exist:') 

265 print(*map(' - {}'.format, nexist), sep='\n') 

266 print('') 

267 return success 

268 

269 @staticmethod 

270 def _build_outpaths() -> tuple: 

271 """Build the output path to the reference and key files. 

272 

273 Returns: 

274 tuple: Full path to the reference and key files as:: 

275 

276 ('fname.ref', 'fname.key') 

277 

278 """ 

279 _os = sys.platform.lower() 

280 fn_ref = 'srccheck.ref' 

281 fn_key = 'srccheck.key' 

282 if 'win' in _os: # pragma nocover 

283 desk = os.path.join(os.environ.get('USERPROFILE'), 'Desktop') 

284 elif 'lin' in _os: 

285 desk = os.path.join(os.environ.get('HOME'), 'Desktop') 

286 else: # pragma nocover 

287 raise NotImplementedError(f'Not a currently supported OS: {_os}') 

288 return os.path.join(desk, fn_ref), os.path.join(desk, fn_key) 

289 

290 @staticmethod 

291 def _checksum(files: list) -> dict: 

292 """Calculate checksum for all passed files. 

293 

294 Args: 

295 files (list): List of full paths against which a checksum is to be 

296 calculated. 

297 

298 Returns: 

299 dict: A dictionary containing the filename and checksum for all 

300 passed files, as:: 

301 

302 {'fname_01': 'checksum_hash_01', 

303 'fname_02': 'checksum_hash_02', 

304 'fname_03': 'checksum_hash_03', 

305 ..., 

306 'fname_NN': 'checksum_hash_NN'} 

307 

308 """ 

309 return {f: crypto.checksum_md5(path=f) for f in files} 

310 

311 @staticmethod 

312 def _report_mismatches(checksums: dict, reference: dict): 

313 """Report the files for which the checksums do not match. 

314 

315 Args: 

316 checksums (dict): A dictionary containing the recently calculated 

317 checksums. 

318 reference (dict): A dictionary containing the *expected* checksums. 

319 

320 """ 

321 # pylint: disable=consider-using-f-string 

322 m = [] 

323 for k, v in reference.items(): 

324 if checksums.get(k) != v: 

325 m.append(os.path.basename(k)) 

326 print('\nChecksum verification has failed for the following:') 

327 print(*map('- {}'.format, m), sep='\n') 

328 print('') 

329 

330 

331srccheck = SourceCheck()