dupechecker.dupechecker
1import argparse 2import filecmp 3import time 4from concurrent.futures import ThreadPoolExecutor 5from itertools import combinations 6 7from griddle import griddy 8from pathier import Pathier 9from printbuddies import Spinner 10from younotyou import younotyou 11 12 13def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]: 14 """Return a list of lists for duplicate files in `paths`.""" 15 matching_sets = [] 16 while len(paths) > 0: 17 comparee = paths.pop() 18 matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)] 19 if matching_files: 20 [paths.pop(paths.index(file)) for file in matching_files] 21 matching_files.insert(0, comparee) 22 matching_sets.append(matching_files) 23 return matching_sets 24 25 26def get_args() -> argparse.Namespace: 27 parser = argparse.ArgumentParser() 28 29 parser.add_argument( 30 "-r", 31 "--recursive", 32 action="store_true", 33 help=""" Glob files to compare recursively. """, 34 ) 35 36 parser.add_argument( 37 "-i", 38 "--ignores", 39 type=str, 40 nargs="*", 41 default=[], 42 help=""" Ignore files matching these patterns. 43 e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""", 44 ) 45 46 parser.add_argument( 47 "-d", 48 "--delete_dupes", 49 action="store_true", 50 help=""" After finding duplicates, delete all but one copy. 51 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 52 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 53 ) 54 55 parser.add_argument( 56 "-ad", 57 "--autodelete", 58 action="store_true", 59 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 60 ) 61 62 parser.add_argument( 63 "-ns", 64 "--no_show", 65 action="store_true", 66 help=""" Don't show printout of matching files. """, 67 ) 68 69 parser.add_argument( 70 "paths", 71 type=str, 72 default=[Pathier.cwd()], 73 nargs="*", 74 help=""" The paths to compare files in. """, 75 ) 76 77 args = parser.parse_args() 78 if not args.paths == [Pathier.cwd()]: 79 args.paths = [Pathier(path) for path in args.paths] 80 files = [] 81 print("Gathering files...") 82 for path in args.paths: 83 files.extend( 84 list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*")) 85 ) 86 args.paths = younotyou([str(file) for file in files], exclude_patterns=args.ignores) 87 num_comparisons = len(list(combinations(args.paths, 2))) 88 print(f"Making {num_comparisons} comparisons between {len(args.paths)} files...") 89 90 return args 91 92 93def delete_wizard(matches: list[list[Pathier]]): 94 """Ask which file to keep for each set.""" 95 print() 96 print("Enter the corresponding number of the file to keep.") 97 print( 98 "Press 'Enter' without giving a number to skip deleting any files for the given set." 99 ) 100 print() 101 for match in matches: 102 map_ = {str(i): file for i, file in enumerate(match, 1)} 103 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 104 print(options) 105 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 106 if keeper: 107 [map_[num].delete() for num in map_ if num != keeper] 108 109 110def autodelete(matches: list[list[Pathier]]): 111 """Keep one of each set in `matches` and delete the others.""" 112 for match in matches: 113 match.pop() 114 [file.delete() for file in match] 115 116 117def dupechecker(args: argparse.Namespace | None = None): 118 print() 119 if not args: 120 args = get_args() 121 s = [ 122 ch.rjust(i + j) 123 for i in range(1, 25, 3) 124 for j, ch in enumerate(["/", "-", "\\"]) 125 ] 126 s += s[::-1] 127 with Spinner(s) as spinner: 128 with ThreadPoolExecutor() as exc: 129 thread = exc.submit(get_duplicates, args.paths) 130 while not thread.done(): 131 spinner.display() 132 time.sleep(0.025) 133 matches = thread.result() 134 if matches: 135 print(f"Found {len(matches)} duplicate sets of files.") 136 if not args.no_show: 137 print(griddy(matches)) 138 if args.delete_dupes or args.autodelete: 139 size = args.path.size() 140 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 141 deleted_size = size - args.path.size() 142 print(f"Deleted {Pathier.format_size(deleted_size)}.") 143 else: 144 print("No duplicates detected.") 145 146 147if __name__ == "__main__": 148 dupechecker(get_args())
def
get_duplicates( paths: list[pathier.pathier.Pathier]) -> list[list[pathier.pathier.Pathier]]:
14def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]: 15 """Return a list of lists for duplicate files in `paths`.""" 16 matching_sets = [] 17 while len(paths) > 0: 18 comparee = paths.pop() 19 matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)] 20 if matching_files: 21 [paths.pop(paths.index(file)) for file in matching_files] 22 matching_files.insert(0, comparee) 23 matching_sets.append(matching_files) 24 return matching_sets
Return a list of lists for duplicate files in paths
.
def
get_args() -> argparse.Namespace:
27def get_args() -> argparse.Namespace: 28 parser = argparse.ArgumentParser() 29 30 parser.add_argument( 31 "-r", 32 "--recursive", 33 action="store_true", 34 help=""" Glob files to compare recursively. """, 35 ) 36 37 parser.add_argument( 38 "-i", 39 "--ignores", 40 type=str, 41 nargs="*", 42 default=[], 43 help=""" Ignore files matching these patterns. 44 e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""", 45 ) 46 47 parser.add_argument( 48 "-d", 49 "--delete_dupes", 50 action="store_true", 51 help=""" After finding duplicates, delete all but one copy. 52 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 53 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 54 ) 55 56 parser.add_argument( 57 "-ad", 58 "--autodelete", 59 action="store_true", 60 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 61 ) 62 63 parser.add_argument( 64 "-ns", 65 "--no_show", 66 action="store_true", 67 help=""" Don't show printout of matching files. """, 68 ) 69 70 parser.add_argument( 71 "paths", 72 type=str, 73 default=[Pathier.cwd()], 74 nargs="*", 75 help=""" The paths to compare files in. """, 76 ) 77 78 args = parser.parse_args() 79 if not args.paths == [Pathier.cwd()]: 80 args.paths = [Pathier(path) for path in args.paths] 81 files = [] 82 print("Gathering files...") 83 for path in args.paths: 84 files.extend( 85 list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*")) 86 ) 87 args.paths = younotyou([str(file) for file in files], exclude_patterns=args.ignores) 88 num_comparisons = len(list(combinations(args.paths, 2))) 89 print(f"Making {num_comparisons} comparisons between {len(args.paths)} files...") 90 91 return args
def
delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
94def delete_wizard(matches: list[list[Pathier]]): 95 """Ask which file to keep for each set.""" 96 print() 97 print("Enter the corresponding number of the file to keep.") 98 print( 99 "Press 'Enter' without giving a number to skip deleting any files for the given set." 100 ) 101 print() 102 for match in matches: 103 map_ = {str(i): file for i, file in enumerate(match, 1)} 104 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 105 print(options) 106 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 107 if keeper: 108 [map_[num].delete() for num in map_ if num != keeper]
Ask which file to keep for each set.
def
autodelete(matches: list[list[pathier.pathier.Pathier]]):
111def autodelete(matches: list[list[Pathier]]): 112 """Keep one of each set in `matches` and delete the others.""" 113 for match in matches: 114 match.pop() 115 [file.delete() for file in match]
Keep one of each set in matches
and delete the others.
def
dupechecker(args: argparse.Namespace | None = None):
118def dupechecker(args: argparse.Namespace | None = None): 119 print() 120 if not args: 121 args = get_args() 122 s = [ 123 ch.rjust(i + j) 124 for i in range(1, 25, 3) 125 for j, ch in enumerate(["/", "-", "\\"]) 126 ] 127 s += s[::-1] 128 with Spinner(s) as spinner: 129 with ThreadPoolExecutor() as exc: 130 thread = exc.submit(get_duplicates, args.paths) 131 while not thread.done(): 132 spinner.display() 133 time.sleep(0.025) 134 matches = thread.result() 135 if matches: 136 print(f"Found {len(matches)} duplicate sets of files.") 137 if not args.no_show: 138 print(griddy(matches)) 139 if args.delete_dupes or args.autodelete: 140 size = args.path.size() 141 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 142 deleted_size = size - args.path.size() 143 print(f"Deleted {Pathier.format_size(deleted_size)}.") 144 else: 145 print("No duplicates detected.")