dupechecker.dupechecker
1import argparse 2import filecmp 3import time 4from concurrent.futures import ThreadPoolExecutor 5 6from griddle import griddy 7from pathier import Pathier 8from printbuddies import Spinner 9 10 11def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]: 12 """Return a list of lists for duplicate files in `path`. 13 Each sub-list will contain 2 or more files determined to be equivalent files. 14 If `recursive` is `True`, files from `path` and it's subdirectories will be compared.""" 15 files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*")) 16 matching_sets = [] 17 while len(files) > 0: 18 comparee = files.pop() 19 matching_files = [file for file in files if filecmp.cmp(comparee, file, False)] 20 if matching_files: 21 [files.pop(files.index(file)) for file in matching_files] 22 matching_files.insert(0, comparee) 23 matching_sets.append(matching_files) 24 return matching_sets 25 26 27def get_args() -> argparse.Namespace: 28 parser = argparse.ArgumentParser() 29 30 parser.add_argument( 31 "-r", 32 "--recursive", 33 action="store_true", 34 help=""" Glob files to compare recursively. """, 35 ) 36 37 parser.add_argument( 38 "-d", 39 "--delete_dupes", 40 action="store_true", 41 help=""" After finding duplicates, delete all but one copy. 42 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 43 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 44 ) 45 46 parser.add_argument( 47 "-ad", 48 "--autodelete", 49 action="store_true", 50 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 51 ) 52 53 parser.add_argument( 54 "-ns", 55 "--no_show", 56 action="store_true", 57 help=""" Don't show printout of matching files. """, 58 ) 59 60 parser.add_argument( 61 "path", 62 type=str, 63 default=Pathier.cwd(), 64 nargs="?", 65 help=""" The path to compare files in. """, 66 ) 67 68 args = parser.parse_args() 69 if not args.path == Pathier.cwd(): 70 args.path = Pathier(args.path) 71 72 return args 73 74 75def delete_wizard(matches: list[list[Pathier]]): 76 """Ask which file to keep for each set.""" 77 print("Enter the corresponding number of the file to keep.") 78 print( 79 "Press 'Enter' without giving a number to skip deleting any files for the given set." 80 ) 81 for match in matches: 82 map_ = {str(i): file for i, file in enumerate(match, 1)} 83 prompt = " | ".join(f"({i})<->{file}" for i, file in map_.items()) 84 keeper = input(prompt + " ") 85 if keeper: 86 [map_[num].delete() for num in map_ if num != keeper] 87 88 89def autodelete(matches: list[list[Pathier]]): 90 """Keep one of each set in `matches` and delete the others.""" 91 for match in matches: 92 match.pop() 93 [file.delete() for file in match] 94 95 96def dupechecker(args: argparse.Namespace | None = None): 97 if not args: 98 args = get_args() 99 s = [ 100 ch.rjust(i + j) 101 for i in range(1, 25, 3) 102 for j, ch in enumerate(["/", "-", "\\"]) 103 ] 104 s += s[::-1] 105 with Spinner(s) as spinner: 106 with ThreadPoolExecutor() as exc: 107 thread = exc.submit(get_duplicates, args.path, args.recursive) 108 while not thread.done(): 109 spinner.display() 110 time.sleep(0.025) 111 matches = thread.result() 112 if matches: 113 print(f"Found {len(matches)} duplicate sets of files.") 114 if not args.no_show: 115 print(griddy(matches)) 116 if args.delete_dupes or args.autodelete: 117 size = args.path.size() 118 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 119 deleted_size = size - args.path.size() 120 print(f"Deleted {Pathier.format_size(deleted_size)}.") 121 else: 122 print("No duplicates detected.") 123 124 125if __name__ == "__main__": 126 dupechecker(get_args())
def
get_duplicates( path: pathier.pathier.Pathier, recursive: bool = False) -> list[list[pathier.pathier.Pathier]]:
12def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]: 13 """Return a list of lists for duplicate files in `path`. 14 Each sub-list will contain 2 or more files determined to be equivalent files. 15 If `recursive` is `True`, files from `path` and it's subdirectories will be compared.""" 16 files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*")) 17 matching_sets = [] 18 while len(files) > 0: 19 comparee = files.pop() 20 matching_files = [file for file in files if filecmp.cmp(comparee, file, False)] 21 if matching_files: 22 [files.pop(files.index(file)) for file in matching_files] 23 matching_files.insert(0, comparee) 24 matching_sets.append(matching_files) 25 return matching_sets
Return a list of lists for duplicate files in path
.
Each sub-list will contain 2 or more files determined to be equivalent files.
If recursive
is True
, files from path
and it's subdirectories will be compared.
def
get_args() -> argparse.Namespace:
28def get_args() -> argparse.Namespace: 29 parser = argparse.ArgumentParser() 30 31 parser.add_argument( 32 "-r", 33 "--recursive", 34 action="store_true", 35 help=""" Glob files to compare recursively. """, 36 ) 37 38 parser.add_argument( 39 "-d", 40 "--delete_dupes", 41 action="store_true", 42 help=""" After finding duplicates, delete all but one copy. 43 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 44 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 45 ) 46 47 parser.add_argument( 48 "-ad", 49 "--autodelete", 50 action="store_true", 51 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 52 ) 53 54 parser.add_argument( 55 "-ns", 56 "--no_show", 57 action="store_true", 58 help=""" Don't show printout of matching files. """, 59 ) 60 61 parser.add_argument( 62 "path", 63 type=str, 64 default=Pathier.cwd(), 65 nargs="?", 66 help=""" The path to compare files in. """, 67 ) 68 69 args = parser.parse_args() 70 if not args.path == Pathier.cwd(): 71 args.path = Pathier(args.path) 72 73 return args
def
delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
76def delete_wizard(matches: list[list[Pathier]]): 77 """Ask which file to keep for each set.""" 78 print("Enter the corresponding number of the file to keep.") 79 print( 80 "Press 'Enter' without giving a number to skip deleting any files for the given set." 81 ) 82 for match in matches: 83 map_ = {str(i): file for i, file in enumerate(match, 1)} 84 prompt = " | ".join(f"({i})<->{file}" for i, file in map_.items()) 85 keeper = input(prompt + " ") 86 if keeper: 87 [map_[num].delete() for num in map_ if num != keeper]
Ask which file to keep for each set.
def
autodelete(matches: list[list[pathier.pathier.Pathier]]):
90def autodelete(matches: list[list[Pathier]]): 91 """Keep one of each set in `matches` and delete the others.""" 92 for match in matches: 93 match.pop() 94 [file.delete() for file in match]
Keep one of each set in matches
and delete the others.
def
dupechecker(args: argparse.Namespace | None = None):
97def dupechecker(args: argparse.Namespace | None = None): 98 if not args: 99 args = get_args() 100 s = [ 101 ch.rjust(i + j) 102 for i in range(1, 25, 3) 103 for j, ch in enumerate(["/", "-", "\\"]) 104 ] 105 s += s[::-1] 106 with Spinner(s) as spinner: 107 with ThreadPoolExecutor() as exc: 108 thread = exc.submit(get_duplicates, args.path, args.recursive) 109 while not thread.done(): 110 spinner.display() 111 time.sleep(0.025) 112 matches = thread.result() 113 if matches: 114 print(f"Found {len(matches)} duplicate sets of files.") 115 if not args.no_show: 116 print(griddy(matches)) 117 if args.delete_dupes or args.autodelete: 118 size = args.path.size() 119 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 120 deleted_size = size - args.path.size() 121 print(f"Deleted {Pathier.format_size(deleted_size)}.") 122 else: 123 print("No duplicates detected.")