dupechecker.dupechecker

  1import argparse
  2import filecmp
  3import time
  4from concurrent.futures import ThreadPoolExecutor
  5
  6from griddle import griddy
  7from pathier import Pathier
  8from printbuddies import Spinner
  9
 10
 11def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]:
 12    """Return a list of lists for duplicate files in `path`.
 13    Each sub-list will contain 2 or more files determined to be equivalent files.
 14    If `recursive` is `True`, files from `path` and it's subdirectories will be compared."""
 15    files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*"))
 16    matching_sets = []
 17    while len(files) > 0:
 18        comparee = files.pop()
 19        matching_files = [file for file in files if filecmp.cmp(comparee, file, False)]
 20        if matching_files:
 21            [files.pop(files.index(file)) for file in matching_files]
 22            matching_files.insert(0, comparee)
 23            matching_sets.append(matching_files)
 24    return matching_sets
 25
 26
 27def get_args() -> argparse.Namespace:
 28    parser = argparse.ArgumentParser()
 29
 30    parser.add_argument(
 31        "-r",
 32        "--recursive",
 33        action="store_true",
 34        help=""" Glob files to compare recursively. """,
 35    )
 36
 37    parser.add_argument(
 38        "-d",
 39        "--delete_dupes",
 40        action="store_true",
 41        help=""" After finding duplicates, delete all but one copy.
 42        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
 43        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
 44    )
 45
 46    parser.add_argument(
 47        "-ad",
 48        "--autodelete",
 49        action="store_true",
 50        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
 51    )
 52
 53    parser.add_argument(
 54        "-ns",
 55        "--no_show",
 56        action="store_true",
 57        help=""" Don't show printout of matching files. """,
 58    )
 59
 60    parser.add_argument(
 61        "path",
 62        type=str,
 63        default=Pathier.cwd(),
 64        nargs="?",
 65        help=""" The path to compare files in. """,
 66    )
 67
 68    args = parser.parse_args()
 69    if not args.path == Pathier.cwd():
 70        args.path = Pathier(args.path)
 71
 72    return args
 73
 74
 75def delete_wizard(matches: list[list[Pathier]]):
 76    """Ask which file to keep for each set."""
 77    print("Enter the corresponding number of the file to keep.")
 78    print(
 79        "Press 'Enter' without giving a number to skip deleting any files for the given set."
 80    )
 81    for match in matches:
 82        map_ = {str(i): file for i, file in enumerate(match, 1)}
 83        prompt = " | ".join(f"({i})<->{file}" for i, file in map_.items())
 84        keeper = input(prompt + " ")
 85        if keeper:
 86            [map_[num].delete() for num in map_ if num != keeper]
 87
 88
 89def autodelete(matches: list[list[Pathier]]):
 90    """Keep one of each set in `matches` and delete the others."""
 91    for match in matches:
 92        match.pop()
 93        [file.delete() for file in match]
 94
 95
 96def dupechecker(args: argparse.Namespace | None = None):
 97    if not args:
 98        args = get_args()
 99    s = [
100        ch.rjust(i + j)
101        for i in range(1, 25, 3)
102        for j, ch in enumerate(["/", "-", "\\"])
103    ]
104    s += s[::-1]
105    with Spinner(s) as spinner:
106        with ThreadPoolExecutor() as exc:
107            thread = exc.submit(get_duplicates, args.path, args.recursive)
108            while not thread.done():
109                spinner.display()
110                time.sleep(0.025)
111            matches = thread.result()
112    if matches:
113        print(f"Found {len(matches)} duplicate sets of files.")
114        if not args.no_show:
115            print(griddy(matches))
116        if args.delete_dupes or args.autodelete:
117            size = args.path.size()
118            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
119            deleted_size = size - args.path.size()
120            print(f"Deleted {Pathier.format_size(deleted_size)}.")
121    else:
122        print("No duplicates detected.")
123
124
125if __name__ == "__main__":
126    dupechecker(get_args())
def get_duplicates( path: pathier.pathier.Pathier, recursive: bool = False) -> list[list[pathier.pathier.Pathier]]:
12def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]:
13    """Return a list of lists for duplicate files in `path`.
14    Each sub-list will contain 2 or more files determined to be equivalent files.
15    If `recursive` is `True`, files from `path` and it's subdirectories will be compared."""
16    files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*"))
17    matching_sets = []
18    while len(files) > 0:
19        comparee = files.pop()
20        matching_files = [file for file in files if filecmp.cmp(comparee, file, False)]
21        if matching_files:
22            [files.pop(files.index(file)) for file in matching_files]
23            matching_files.insert(0, comparee)
24            matching_sets.append(matching_files)
25    return matching_sets

Return a list of lists for duplicate files in path. Each sub-list will contain 2 or more files determined to be equivalent files. If recursive is True, files from path and it's subdirectories will be compared.

def get_args() -> argparse.Namespace:
28def get_args() -> argparse.Namespace:
29    parser = argparse.ArgumentParser()
30
31    parser.add_argument(
32        "-r",
33        "--recursive",
34        action="store_true",
35        help=""" Glob files to compare recursively. """,
36    )
37
38    parser.add_argument(
39        "-d",
40        "--delete_dupes",
41        action="store_true",
42        help=""" After finding duplicates, delete all but one copy.
43        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
44        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
45    )
46
47    parser.add_argument(
48        "-ad",
49        "--autodelete",
50        action="store_true",
51        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
52    )
53
54    parser.add_argument(
55        "-ns",
56        "--no_show",
57        action="store_true",
58        help=""" Don't show printout of matching files. """,
59    )
60
61    parser.add_argument(
62        "path",
63        type=str,
64        default=Pathier.cwd(),
65        nargs="?",
66        help=""" The path to compare files in. """,
67    )
68
69    args = parser.parse_args()
70    if not args.path == Pathier.cwd():
71        args.path = Pathier(args.path)
72
73    return args
def delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
76def delete_wizard(matches: list[list[Pathier]]):
77    """Ask which file to keep for each set."""
78    print("Enter the corresponding number of the file to keep.")
79    print(
80        "Press 'Enter' without giving a number to skip deleting any files for the given set."
81    )
82    for match in matches:
83        map_ = {str(i): file for i, file in enumerate(match, 1)}
84        prompt = " | ".join(f"({i})<->{file}" for i, file in map_.items())
85        keeper = input(prompt + " ")
86        if keeper:
87            [map_[num].delete() for num in map_ if num != keeper]

Ask which file to keep for each set.

def autodelete(matches: list[list[pathier.pathier.Pathier]]):
90def autodelete(matches: list[list[Pathier]]):
91    """Keep one of each set in `matches` and delete the others."""
92    for match in matches:
93        match.pop()
94        [file.delete() for file in match]

Keep one of each set in matches and delete the others.

def dupechecker(args: argparse.Namespace | None = None):
 97def dupechecker(args: argparse.Namespace | None = None):
 98    if not args:
 99        args = get_args()
100    s = [
101        ch.rjust(i + j)
102        for i in range(1, 25, 3)
103        for j, ch in enumerate(["/", "-", "\\"])
104    ]
105    s += s[::-1]
106    with Spinner(s) as spinner:
107        with ThreadPoolExecutor() as exc:
108            thread = exc.submit(get_duplicates, args.path, args.recursive)
109            while not thread.done():
110                spinner.display()
111                time.sleep(0.025)
112            matches = thread.result()
113    if matches:
114        print(f"Found {len(matches)} duplicate sets of files.")
115        if not args.no_show:
116            print(griddy(matches))
117        if args.delete_dupes or args.autodelete:
118            size = args.path.size()
119            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
120            deleted_size = size - args.path.size()
121            print(f"Deleted {Pathier.format_size(deleted_size)}.")
122    else:
123        print("No duplicates detected.")