Coverage for e2xgrader/exchange/hash_utils.py: 98%

43 statements  

« prev     ^ index     » next       coverage.py v7.4.2, created at 2024-03-14 13:22 +0100

1import fnmatch 

2import hashlib 

3import os 

4from typing import List, Tuple 

5 

6 

7def compute_hashcode_of_file(filename, method="sha1") -> str: 

8 """ 

9 Compute the hash code of a file. 

10 

11 Args: 

12 filename (str): The path to the file. 

13 method (str, optional): The hash algorithm to use. Defaults to "sha1". 

14 

15 Returns: 

16 str: The computed hash code. 

17 

18 Raises: 

19 ValueError: If the specified method is not supported. 

20 """ 

21 if method == "md5": 

22 hashcode = hashlib.md5() 

23 elif method == "sha1": 

24 hashcode = hashlib.sha1() 

25 else: 

26 raise ValueError("Currently only the methods md5 and sha1 are supported!") 

27 

28 with open(filename, "rb") as f: 

29 for chunk in iter(lambda: f.read(4096), b""): 

30 hashcode.update(chunk) 

31 

32 return hashcode.hexdigest() 

33 

34 

35def hash_files_in_directory( 

36 directory, 

37 method="sha1", 

38 exclude_files=None, 

39 exclude_subfolders=None, 

40) -> List[Tuple[str, str]]: 

41 """ 

42 Hashes all files in a directory using the specified method. 

43 

44 Args: 

45 directory (str): The directory path to hash files in. 

46 method (str, optional): The hashing method to use. Defaults to "sha1". 

47 exclude_files (List[str], optional): List of file names or patterns to exclude from hashing. 

48 Defaults to None. 

49 exclude_subfolders (List[str], optional): List of subfolder names to exclude from hashing. 

50 Defaults to None. 

51 

52 Returns: 

53 List[Tuple[str, str]]: A list of tuples containing the relative file paths and their 

54 corresponding hash codes. 

55 

56 Examples: 

57 >>> hash_files_in_directory("path/to/directory") 

58 [ 

59 ("file1.txt", "hashcode1"), 

60 ("file2.txt", "hashcode2"), 

61 ("notebook.ipynb", "hashcode3"), 

62 (".ipynb_checkpoints/notebook-checkpoint.ipynb", "hashcode4"), 

63 ... 

64 ] 

65 >>> hash_files_in_directory( 

66 "path/to/directory", 

67 exclude_files=["*.txt"], 

68 exclude_subfolders=[".ipynb_checkpoints"] 

69 ) 

70 [ 

71 ("notebook.ipynb", "hashcode3"), 

72 ... 

73 ] 

74 """ 

75 if exclude_files is None: 

76 exclude_files = [] 

77 if exclude_subfolders is None: 

78 exclude_subfolders = [] 

79 else: 

80 exclude_subfolders = [ 

81 os.path.normpath(subfolder) for subfolder in exclude_subfolders 

82 ] 

83 

84 hashes = dict() 

85 

86 for root, dirs, files in os.walk(directory): 

87 for file in files: 

88 if any(fnmatch.fnmatch(file, pattern) for pattern in exclude_files): 

89 continue 

90 if any([subfolder in root for subfolder in exclude_subfolders]): 

91 continue 

92 filename = os.path.join(root, file) 

93 hashes[os.path.relpath(filename, start=directory)] = ( 

94 compute_hashcode_of_file(filename, method) 

95 ) 

96 

97 return sorted(hashes.items()) 

98 

99 

100def generate_directory_hash_file( 

101 directory, 

102 method="sha1", 

103 exclude_files=None, 

104 exclude_subfolders=None, 

105 output_file="hashes.txt", 

106): 

107 """ 

108 Creates a file containing the hash codes of all files in a directory. 

109 

110 Args: 

111 directory (str): The directory path to hash files in. 

112 method (str, optional): The hashing method to use. Defaults to "sha1". 

113 exclude_files (List[str], optional): List of file names or patterns to exclude from hashing. 

114 Defaults to None. 

115 exclude_subfolders (List[str], optional): List of subfolder names to exclude from hashing. 

116 Defaults to None. 

117 output_file (str, optional): The name of the file to write the hash codes to. 

118 Defaults to "hashes.txt". 

119 """ 

120 if exclude_files is None: 

121 exclude_files = [output_file] 

122 else: 

123 exclude_files.append(output_file) 

124 

125 hashes = hash_files_in_directory( 

126 directory, method, exclude_files, exclude_subfolders 

127 ) 

128 formatted_hashes = "\n".join( 

129 [f"{hashcode} {filename}" for filename, hashcode in hashes] 

130 ) 

131 with open(os.path.join(directory, output_file), "w") as f: 

132 f.write(formatted_hashes) 

133 

134 

135def truncate_hashcode(hashcode, number_of_chunks=3, chunk_size=4): 

136 """ 

137 Truncate a hash code into a more readable format. 

138 

139 Args: 

140 hashcode (str): The hash code to truncate. 

141 number_of_chunks (int, optional): The number of chunks to split the hash code into. 

142 Defaults to 4. 

143 chunk_size (int, optional): The size of each chunk. Defaults to 5. 

144 

145 Returns: 

146 str: The truncated hash code. 

147 """ 

148 hash_string = "" 

149 for i in range(0, number_of_chunks * chunk_size, chunk_size): 

150 hash_string += f"-{hashcode[i:i+chunk_size]}" 

151 return hash_string[1:]