dupechecker.dupechecker

  1import argparse
  2import filecmp
  3import time
  4from concurrent.futures import ThreadPoolExecutor
  5
  6from griddle import griddy
  7from pathier import Pathier
  8from printbuddies import Spinner
  9
 10
 11def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]:
 12    """Return a list of lists for duplicate files in `path`.
 13    Each sub-list will contain 2 or more files determined to be equivalent files.
 14    If `recursive` is `True`, files from `path` and it's subdirectories will be compared."""
 15    files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*"))
 16    matching_sets = []
 17    while len(files) > 0:
 18        comparee = files.pop()
 19        matching_files = [file for file in files if filecmp.cmp(comparee, file, False)]
 20        if matching_files:
 21            [files.pop(files.index(file)) for file in matching_files]
 22            matching_files.insert(0, comparee)
 23            matching_sets.append(matching_files)
 24    return matching_sets
 25
 26
 27def get_args() -> argparse.Namespace:
 28    parser = argparse.ArgumentParser()
 29
 30    parser.add_argument(
 31        "-r",
 32        "--recursive",
 33        action="store_true",
 34        help=""" Glob files to compare recursively. """,
 35    )
 36
 37    parser.add_argument(
 38        "-d",
 39        "--delete_dupes",
 40        action="store_true",
 41        help=""" After finding duplicates, delete all but one copy.
 42        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
 43        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
 44    )
 45
 46    parser.add_argument(
 47        "-ad",
 48        "--autodelete",
 49        action="store_true",
 50        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
 51    )
 52
 53    parser.add_argument(
 54        "-ns",
 55        "--no_show",
 56        action="store_true",
 57        help=""" Don't show printout of matching files. """,
 58    )
 59
 60    parser.add_argument(
 61        "path",
 62        type=str,
 63        default=Pathier.cwd(),
 64        nargs="?",
 65        help=""" The path to compare files in. """,
 66    )
 67
 68    args = parser.parse_args()
 69    if not args.path == Pathier.cwd():
 70        args.path = Pathier(args.path)
 71
 72    return args
 73
 74
 75def delete_wizard(matches: list[list[Pathier]]):
 76    """Ask which file to keep for each set."""
 77    print()
 78    print("Enter the corresponding number of the file to keep.")
 79    print(
 80        "Press 'Enter' without giving a number to skip deleting any files for the given set."
 81    )
 82    print()
 83    for match in matches:
 84        map_ = {str(i): file for i, file in enumerate(match, 1)}
 85        options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n"
 86        print(options)
 87        keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ")
 88        if keeper:
 89            [map_[num].delete() for num in map_ if num != keeper]
 90
 91
 92def autodelete(matches: list[list[Pathier]]):
 93    """Keep one of each set in `matches` and delete the others."""
 94    for match in matches:
 95        match.pop()
 96        [file.delete() for file in match]
 97
 98
 99def dupechecker(args: argparse.Namespace | None = None):
100    print()
101    if not args:
102        args = get_args()
103    s = [
104        ch.rjust(i + j)
105        for i in range(1, 25, 3)
106        for j, ch in enumerate(["/", "-", "\\"])
107    ]
108    s += s[::-1]
109    with Spinner(s) as spinner:
110        with ThreadPoolExecutor() as exc:
111            thread = exc.submit(get_duplicates, args.path, args.recursive)
112            while not thread.done():
113                spinner.display()
114                time.sleep(0.025)
115            matches = thread.result()
116    if matches:
117        print(f"Found {len(matches)} duplicate sets of files.")
118        if not args.no_show:
119            print(griddy(matches))
120        if args.delete_dupes or args.autodelete:
121            size = args.path.size()
122            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
123            deleted_size = size - args.path.size()
124            print(f"Deleted {Pathier.format_size(deleted_size)}.")
125    else:
126        print("No duplicates detected.")
127
128
129if __name__ == "__main__":
130    dupechecker(get_args())
def get_duplicates( path: pathier.pathier.Pathier, recursive: bool = False) -> list[list[pathier.pathier.Pathier]]:
12def get_duplicates(path: Pathier, recursive: bool = False) -> list[list[Pathier]]:
13    """Return a list of lists for duplicate files in `path`.
14    Each sub-list will contain 2 or more files determined to be equivalent files.
15    If `recursive` is `True`, files from `path` and it's subdirectories will be compared."""
16    files = list(path.rglob("*.*")) if recursive else list(path.glob("*.*"))
17    matching_sets = []
18    while len(files) > 0:
19        comparee = files.pop()
20        matching_files = [file for file in files if filecmp.cmp(comparee, file, False)]
21        if matching_files:
22            [files.pop(files.index(file)) for file in matching_files]
23            matching_files.insert(0, comparee)
24            matching_sets.append(matching_files)
25    return matching_sets

Return a list of lists for duplicate files in path. Each sub-list will contain 2 or more files determined to be equivalent files. If recursive is True, files from path and it's subdirectories will be compared.

def get_args() -> argparse.Namespace:
28def get_args() -> argparse.Namespace:
29    parser = argparse.ArgumentParser()
30
31    parser.add_argument(
32        "-r",
33        "--recursive",
34        action="store_true",
35        help=""" Glob files to compare recursively. """,
36    )
37
38    parser.add_argument(
39        "-d",
40        "--delete_dupes",
41        action="store_true",
42        help=""" After finding duplicates, delete all but one copy.
43        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
44        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
45    )
46
47    parser.add_argument(
48        "-ad",
49        "--autodelete",
50        action="store_true",
51        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
52    )
53
54    parser.add_argument(
55        "-ns",
56        "--no_show",
57        action="store_true",
58        help=""" Don't show printout of matching files. """,
59    )
60
61    parser.add_argument(
62        "path",
63        type=str,
64        default=Pathier.cwd(),
65        nargs="?",
66        help=""" The path to compare files in. """,
67    )
68
69    args = parser.parse_args()
70    if not args.path == Pathier.cwd():
71        args.path = Pathier(args.path)
72
73    return args
def delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
76def delete_wizard(matches: list[list[Pathier]]):
77    """Ask which file to keep for each set."""
78    print()
79    print("Enter the corresponding number of the file to keep.")
80    print(
81        "Press 'Enter' without giving a number to skip deleting any files for the given set."
82    )
83    print()
84    for match in matches:
85        map_ = {str(i): file for i, file in enumerate(match, 1)}
86        options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n"
87        print(options)
88        keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ")
89        if keeper:
90            [map_[num].delete() for num in map_ if num != keeper]

Ask which file to keep for each set.

def autodelete(matches: list[list[pathier.pathier.Pathier]]):
93def autodelete(matches: list[list[Pathier]]):
94    """Keep one of each set in `matches` and delete the others."""
95    for match in matches:
96        match.pop()
97        [file.delete() for file in match]

Keep one of each set in matches and delete the others.

def dupechecker(args: argparse.Namespace | None = None):
100def dupechecker(args: argparse.Namespace | None = None):
101    print()
102    if not args:
103        args = get_args()
104    s = [
105        ch.rjust(i + j)
106        for i in range(1, 25, 3)
107        for j, ch in enumerate(["/", "-", "\\"])
108    ]
109    s += s[::-1]
110    with Spinner(s) as spinner:
111        with ThreadPoolExecutor() as exc:
112            thread = exc.submit(get_duplicates, args.path, args.recursive)
113            while not thread.done():
114                spinner.display()
115                time.sleep(0.025)
116            matches = thread.result()
117    if matches:
118        print(f"Found {len(matches)} duplicate sets of files.")
119        if not args.no_show:
120            print(griddy(matches))
121        if args.delete_dupes or args.autodelete:
122            size = args.path.size()
123            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
124            deleted_size = size - args.path.size()
125            print(f"Deleted {Pathier.format_size(deleted_size)}.")
126    else:
127        print("No duplicates detected.")