dupechecker.dupechecker
1import argparse 2import filecmp 3import time 4from concurrent.futures import ThreadPoolExecutor 5 6from griddle import griddy 7from pathier import Pathier 8from printbuddies import Spinner 9 10 11def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]: 12 """Return a list of lists for duplicate files in `path`. 13 Each sub-list will contain 2 or more files determined to be equivalent files. 14 If `recursive` is `True`, files from `path` and it's subdirectories will be compared.""" 15 files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*")) 16 matching_sets = [] 17 while len(files) > 0: 18 comparee = files.pop() 19 matching_files = [file for file in files if filecmp.cmp(comparee, file, False)] 20 if matching_files: 21 [files.pop(files.index(file)) for file in matching_files] 22 matching_files.insert(0, comparee) 23 matching_sets.append(matching_files) 24 return matching_sets 25 26 27def get_args() -> argparse.Namespace: 28 parser = argparse.ArgumentParser() 29 30 parser.add_argument( 31 "-r", 32 "--recursive", 33 action="store_true", 34 help=""" Glob files to compare recursively. """, 35 ) 36 37 parser.add_argument( 38 "-d", 39 "--delete_dupes", 40 action="store_true", 41 help=""" After finding duplicates, delete all but one copy. 42 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 43 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 44 ) 45 46 parser.add_argument( 47 "-ad", 48 "--autodelete", 49 action="store_true", 50 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 51 ) 52 53 parser.add_argument( 54 "-ns", 55 "--no_show", 56 action="store_true", 57 help=""" Don't show printout of matching files. """, 58 ) 59 60 parser.add_argument( 61 "path", 62 type=str, 63 default=Pathier.cwd(), 64 nargs="?", 65 help=""" The path to compare files in. """, 66 ) 67 68 args = parser.parse_args() 69 if not args.path == Pathier.cwd(): 70 args.path = Pathier(args.path) 71 72 return args 73 74 75def delete_wizard(matches: list[list[Pathier]]): 76 """Ask which file to keep for each set.""" 77 print() 78 print("Enter the corresponding number of the file to keep.") 79 print( 80 "Press 'Enter' without giving a number to skip deleting any files for the given set." 81 ) 82 print() 83 for match in matches: 84 map_ = {str(i): file for i, file in enumerate(match, 1)} 85 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 86 print(options) 87 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 88 if keeper: 89 [map_[num].delete() for num in map_ if num != keeper] 90 91 92def autodelete(matches: list[list[Pathier]]): 93 """Keep one of each set in `matches` and delete the others.""" 94 for match in matches: 95 match.pop() 96 [file.delete() for file in match] 97 98 99def dupechecker(args: argparse.Namespace | None = None): 100 print() 101 if not args: 102 args = get_args() 103 s = [ 104 ch.rjust(i + j) 105 for i in range(1, 25, 3) 106 for j, ch in enumerate(["/", "-", "\\"]) 107 ] 108 s += s[::-1] 109 with Spinner(s) as spinner: 110 with ThreadPoolExecutor() as exc: 111 thread = exc.submit(get_duplicates, args.path, args.recursive) 112 while not thread.done(): 113 spinner.display() 114 time.sleep(0.025) 115 matches = thread.result() 116 if matches: 117 print(f"Found {len(matches)} duplicate sets of files.") 118 if not args.no_show: 119 print(griddy(matches)) 120 if args.delete_dupes or args.autodelete: 121 size = args.path.size() 122 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 123 deleted_size = size - args.path.size() 124 print(f"Deleted {Pathier.format_size(deleted_size)}.") 125 else: 126 print("No duplicates detected.") 127 128 129if __name__ == "__main__": 130 dupechecker(get_args())
def
get_duplicates( path: pathier.pathier.Pathier, recursive: bool = False) -> list[list[pathier.pathier.Pathier]]:
12def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]: 13 """Return a list of lists for duplicate files in `path`. 14 Each sub-list will contain 2 or more files determined to be equivalent files. 15 If `recursive` is `True`, files from `path` and it's subdirectories will be compared.""" 16 files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*")) 17 matching_sets = [] 18 while len(files) > 0: 19 comparee = files.pop() 20 matching_files = [file for file in files if filecmp.cmp(comparee, file, False)] 21 if matching_files: 22 [files.pop(files.index(file)) for file in matching_files] 23 matching_files.insert(0, comparee) 24 matching_sets.append(matching_files) 25 return matching_sets
Return a list of lists for duplicate files in path
.
Each sub-list will contain 2 or more files determined to be equivalent files.
If recursive
is True
, files from path
and it's subdirectories will be compared.
def
get_args() -> argparse.Namespace:
28def get_args() -> argparse.Namespace: 29 parser = argparse.ArgumentParser() 30 31 parser.add_argument( 32 "-r", 33 "--recursive", 34 action="store_true", 35 help=""" Glob files to compare recursively. """, 36 ) 37 38 parser.add_argument( 39 "-d", 40 "--delete_dupes", 41 action="store_true", 42 help=""" After finding duplicates, delete all but one copy. 43 For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep. 44 Pressing 'enter' without entering a number will skip that set without deleting anything.""", 45 ) 46 47 parser.add_argument( 48 "-ad", 49 "--autodelete", 50 action="store_true", 51 help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """, 52 ) 53 54 parser.add_argument( 55 "-ns", 56 "--no_show", 57 action="store_true", 58 help=""" Don't show printout of matching files. """, 59 ) 60 61 parser.add_argument( 62 "path", 63 type=str, 64 default=Pathier.cwd(), 65 nargs="?", 66 help=""" The path to compare files in. """, 67 ) 68 69 args = parser.parse_args() 70 if not args.path == Pathier.cwd(): 71 args.path = Pathier(args.path) 72 73 return args
def
delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
76def delete_wizard(matches: list[list[Pathier]]): 77 """Ask which file to keep for each set.""" 78 print() 79 print("Enter the corresponding number of the file to keep.") 80 print( 81 "Press 'Enter' without giving a number to skip deleting any files for the given set." 82 ) 83 print() 84 for match in matches: 85 map_ = {str(i): file for i, file in enumerate(match, 1)} 86 options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n" 87 print(options) 88 keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ") 89 if keeper: 90 [map_[num].delete() for num in map_ if num != keeper]
Ask which file to keep for each set.
def
autodelete(matches: list[list[pathier.pathier.Pathier]]):
93def autodelete(matches: list[list[Pathier]]): 94 """Keep one of each set in `matches` and delete the others.""" 95 for match in matches: 96 match.pop() 97 [file.delete() for file in match]
Keep one of each set in matches
and delete the others.
def
dupechecker(args: argparse.Namespace | None = None):
100def dupechecker(args: argparse.Namespace | None = None): 101 print() 102 if not args: 103 args = get_args() 104 s = [ 105 ch.rjust(i + j) 106 for i in range(1, 25, 3) 107 for j, ch in enumerate(["/", "-", "\\"]) 108 ] 109 s += s[::-1] 110 with Spinner(s) as spinner: 111 with ThreadPoolExecutor() as exc: 112 thread = exc.submit(get_duplicates, args.path, args.recursive) 113 while not thread.done(): 114 spinner.display() 115 time.sleep(0.025) 116 matches = thread.result() 117 if matches: 118 print(f"Found {len(matches)} duplicate sets of files.") 119 if not args.no_show: 120 print(griddy(matches)) 121 if args.delete_dupes or args.autodelete: 122 size = args.path.size() 123 delete_wizard(matches) if args.delete_dupes else autodelete(matches) 124 deleted_size = size - args.path.size() 125 print(f"Deleted {Pathier.format_size(deleted_size)}.") 126 else: 127 print("No duplicates detected.")