dupechecker.dupechecker
1import argparse 2import filecmp 3import time 4from concurrent.futures import ThreadPoolExecutor 5from copy import deepcopy 6 7from griddle import griddy 8from pathier import Pathier 9from printbuddies import Spinner 10from younotyou import younotyou 11 12 13def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]: 14 """Return a list of lists for duplicate files in `paths`.""" 15 matching_sets = [] 16 while len(paths) > 0: 17 comparee = paths.pop() 18 matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)] 19 if matching_files: 20 [paths.pop(paths.index(file)) for file in matching_files] 21 matching_files.insert(0, comparee) 22 matching_sets.append(matching_files) 23 return matching_sets 24 25 26def get_args() -> argparse.Namespace: 27 parser = argparse.ArgumentParser() 28 29 parser.add_argument( 30 "-r", 31 "--recursive", 32 action="store_true", 33 help=""" Glob files to compare recursively. """, 34 ) 35 36 parser.add_argument( 37 "-i", 38 "--ignores", 39 type=str, 40 nargs="*", 41 default=[], 42 help=""" Ignore files matching these patterns. 43 e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""", 44 ) 45 46 parser.add_argument( 47 "-d", 48 "--delete_dupes", 49 action="store_true", 50 help=""" After finding duplicates, delete all but one copy. 51 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 52 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 53 ) 54 55 parser.add_argument( 56 "-ad", 57 "--autodelete", 58 action="store_true", 59 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 60 ) 61 62 parser.add_argument( 63 "-ns", 64 "--no_show", 65 action="store_true", 66 help=""" Don't show printout of matching files. """, 67 ) 68 69 parser.add_argument( 70 "paths", 71 type=str, 72 default=[Pathier.cwd()], 73 nargs="*", 74 help=""" The paths to compare files in. """, 75 ) 76 77 args = parser.parse_args() 78 if not args.paths == [Pathier.cwd()]: 79 args.paths = [Pathier(path) for path in args.paths] 80 files = [] 81 print("Gathering files...") 82 for path in args.paths: 83 files.extend( 84 list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*")) 85 ) 86 args.paths = [ 87 Pathier(path) 88 for path in younotyou( 89 [str(file) for file in files], exclude_patterns=args.ignores 90 ) 91 ] 92 print(f"Comparing {len(args.paths)} files...") 93 94 return args 95 96 97def delete_wizard(matches: list[list[Pathier]]): 98 """Ask which file to keep for each set.""" 99 print() 100 print("Enter the corresponding number of the file to keep.") 101 print( 102 "Press 'Enter' without giving a number to skip deleting any files for the given set." 103 ) 104 print() 105 for match in matches: 106 map_ = {str(i): file for i, file in enumerate(match, 1)} 107 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 108 print(options) 109 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 110 if keeper: 111 [map_[num].delete() for num in map_ if num != keeper] 112 print() 113 114 115def autodelete(matches: list[list[Pathier]]): 116 """Keep one of each set in `matches` and delete the others.""" 117 for match in matches: 118 match.pop() 119 [file.delete() for file in match] 120 121 122def dupechecker(args: argparse.Namespace | None = None): 123 print() 124 if not args: 125 args = get_args() 126 s = [ 127 ch.rjust(i + j) 128 for i in range(1, 25, 3) 129 for j, ch in enumerate(["/", "-", "\\"]) 130 ] 131 s += s[::-1] 132 with Spinner(s) as spinner: 133 with ThreadPoolExecutor() as exc: 134 thread = exc.submit(get_duplicates, deepcopy(args.paths)) 135 while not thread.done(): 136 spinner.display() 137 time.sleep(0.025) 138 matches = thread.result() 139 if matches: 140 print(f"Found {len(matches)} duplicate sets of files.") 141 if not args.no_show: 142 print( 143 griddy( 144 [["\n".join([str(file) for file in match])] for match in matches] 145 ) 146 ) 147 if args.delete_dupes or args.autodelete: 148 size = lambda: sum(path.size() for path in args.paths) # type: ignore 149 start_size = size() 150 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 151 deleted_size = start_size - size() 152 print(f"Deleted {Pathier.format_size(deleted_size)}.") 153 else: 154 print("No duplicates detected.") 155 156 157if __name__ == "__main__": 158 dupechecker(get_args())
def
get_duplicates( paths: list[pathier.pathier.Pathier]) -> list[list[pathier.pathier.Pathier]]:
14def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]: 15 """Return a list of lists for duplicate files in `paths`.""" 16 matching_sets = [] 17 while len(paths) > 0: 18 comparee = paths.pop() 19 matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)] 20 if matching_files: 21 [paths.pop(paths.index(file)) for file in matching_files] 22 matching_files.insert(0, comparee) 23 matching_sets.append(matching_files) 24 return matching_sets
Return a list of lists for duplicate files in paths
.
def
get_args() -> argparse.Namespace:
27def get_args() -> argparse.Namespace: 28 parser = argparse.ArgumentParser() 29 30 parser.add_argument( 31 "-r", 32 "--recursive", 33 action="store_true", 34 help=""" Glob files to compare recursively. """, 35 ) 36 37 parser.add_argument( 38 "-i", 39 "--ignores", 40 type=str, 41 nargs="*", 42 default=[], 43 help=""" Ignore files matching these patterns. 44 e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""", 45 ) 46 47 parser.add_argument( 48 "-d", 49 "--delete_dupes", 50 action="store_true", 51 help=""" After finding duplicates, delete all but one copy. 52 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 53 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 54 ) 55 56 parser.add_argument( 57 "-ad", 58 "--autodelete", 59 action="store_true", 60 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 61 ) 62 63 parser.add_argument( 64 "-ns", 65 "--no_show", 66 action="store_true", 67 help=""" Don't show printout of matching files. """, 68 ) 69 70 parser.add_argument( 71 "paths", 72 type=str, 73 default=[Pathier.cwd()], 74 nargs="*", 75 help=""" The paths to compare files in. """, 76 ) 77 78 args = parser.parse_args() 79 if not args.paths == [Pathier.cwd()]: 80 args.paths = [Pathier(path) for path in args.paths] 81 files = [] 82 print("Gathering files...") 83 for path in args.paths: 84 files.extend( 85 list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*")) 86 ) 87 args.paths = [ 88 Pathier(path) 89 for path in younotyou( 90 [str(file) for file in files], exclude_patterns=args.ignores 91 ) 92 ] 93 print(f"Comparing {len(args.paths)} files...") 94 95 return args
def
delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
98def delete_wizard(matches: list[list[Pathier]]): 99 """Ask which file to keep for each set.""" 100 print() 101 print("Enter the corresponding number of the file to keep.") 102 print( 103 "Press 'Enter' without giving a number to skip deleting any files for the given set." 104 ) 105 print() 106 for match in matches: 107 map_ = {str(i): file for i, file in enumerate(match, 1)} 108 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 109 print(options) 110 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 111 if keeper: 112 [map_[num].delete() for num in map_ if num != keeper] 113 print()
Ask which file to keep for each set.
def
autodelete(matches: list[list[pathier.pathier.Pathier]]):
116def autodelete(matches: list[list[Pathier]]): 117 """Keep one of each set in `matches` and delete the others.""" 118 for match in matches: 119 match.pop() 120 [file.delete() for file in match]
Keep one of each set in matches
and delete the others.
def
dupechecker(args: argparse.Namespace | None = None):
123def dupechecker(args: argparse.Namespace | None = None): 124 print() 125 if not args: 126 args = get_args() 127 s = [ 128 ch.rjust(i + j) 129 for i in range(1, 25, 3) 130 for j, ch in enumerate(["/", "-", "\\"]) 131 ] 132 s += s[::-1] 133 with Spinner(s) as spinner: 134 with ThreadPoolExecutor() as exc: 135 thread = exc.submit(get_duplicates, deepcopy(args.paths)) 136 while not thread.done(): 137 spinner.display() 138 time.sleep(0.025) 139 matches = thread.result() 140 if matches: 141 print(f"Found {len(matches)} duplicate sets of files.") 142 if not args.no_show: 143 print( 144 griddy( 145 [["\n".join([str(file) for file in match])] for match in matches] 146 ) 147 ) 148 if args.delete_dupes or args.autodelete: 149 size = lambda: sum(path.size() for path in args.paths) # type: ignore 150 start_size = size() 151 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 152 deleted_size = start_size - size() 153 print(f"Deleted {Pathier.format_size(deleted_size)}.") 154 else: 155 print("No duplicates detected.")