dupechecker.dupechecker

  1import argparse
  2import filecmp
  3import time
  4from concurrent.futures import ThreadPoolExecutor
  5from itertools import combinations
  6
  7from griddle import griddy
  8from pathier import Pathier
  9from printbuddies import Spinner
 10from younotyou import younotyou
 11
 12
 13def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]:
 14    """Return a list of lists for duplicate files in `paths`."""
 15    matching_sets = []
 16    while len(paths) > 0:
 17        comparee = paths.pop()
 18        matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)]
 19        if matching_files:
 20            [paths.pop(paths.index(file)) for file in matching_files]
 21            matching_files.insert(0, comparee)
 22            matching_sets.append(matching_files)
 23    return matching_sets
 24
 25
 26def get_args() -> argparse.Namespace:
 27    parser = argparse.ArgumentParser()
 28
 29    parser.add_argument(
 30        "-r",
 31        "--recursive",
 32        action="store_true",
 33        help=""" Glob files to compare recursively. """,
 34    )
 35
 36    parser.add_argument(
 37        "-i",
 38        "--ignores",
 39        type=str,
 40        nargs="*",
 41        default=[],
 42        help=""" Ignore files matching these patterns.
 43        e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""",
 44    )
 45
 46    parser.add_argument(
 47        "-d",
 48        "--delete_dupes",
 49        action="store_true",
 50        help=""" After finding duplicates, delete all but one copy.
 51        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
 52        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
 53    )
 54
 55    parser.add_argument(
 56        "-ad",
 57        "--autodelete",
 58        action="store_true",
 59        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
 60    )
 61
 62    parser.add_argument(
 63        "-ns",
 64        "--no_show",
 65        action="store_true",
 66        help=""" Don't show printout of matching files. """,
 67    )
 68
 69    parser.add_argument(
 70        "paths",
 71        type=str,
 72        default=[Pathier.cwd()],
 73        nargs="*",
 74        help=""" The paths to compare files in. """,
 75    )
 76
 77    args = parser.parse_args()
 78    if not args.paths == [Pathier.cwd()]:
 79        args.paths = [Pathier(path) for path in args.paths]
 80    files = []
 81    print("Gathering files...")
 82    for path in args.paths:
 83        files.extend(
 84            list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*"))
 85        )
 86    args.paths = younotyou([str(file) for file in files], exclude_patterns=args.ignores)
 87    num_comparisons = len(list(combinations(args.paths, 2)))
 88    print(f"Making {num_comparisons} comparisons between {len(args.paths)} files...")
 89
 90    return args
 91
 92
 93def delete_wizard(matches: list[list[Pathier]]):
 94    """Ask which file to keep for each set."""
 95    print()
 96    print("Enter the corresponding number of the file to keep.")
 97    print(
 98        "Press 'Enter' without giving a number to skip deleting any files for the given set."
 99    )
100    print()
101    for match in matches:
102        map_ = {str(i): file for i, file in enumerate(match, 1)}
103        options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n"
104        print(options)
105        keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ")
106        if keeper:
107            [map_[num].delete() for num in map_ if num != keeper]
108
109
110def autodelete(matches: list[list[Pathier]]):
111    """Keep one of each set in `matches` and delete the others."""
112    for match in matches:
113        match.pop()
114        [file.delete() for file in match]
115
116
117def dupechecker(args: argparse.Namespace | None = None):
118    print()
119    if not args:
120        args = get_args()
121    s = [
122        ch.rjust(i + j)
123        for i in range(1, 25, 3)
124        for j, ch in enumerate(["/", "-", "\\"])
125    ]
126    s += s[::-1]
127    with Spinner(s) as spinner:
128        with ThreadPoolExecutor() as exc:
129            thread = exc.submit(get_duplicates, args.paths)
130            while not thread.done():
131                spinner.display()
132                time.sleep(0.025)
133            matches = thread.result()
134    if matches:
135        print(f"Found {len(matches)} duplicate sets of files.")
136        if not args.no_show:
137            print(griddy(matches))
138        if args.delete_dupes or args.autodelete:
139            size = args.path.size()
140            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
141            deleted_size = size - args.path.size()
142            print(f"Deleted {Pathier.format_size(deleted_size)}.")
143    else:
144        print("No duplicates detected.")
145
146
147if __name__ == "__main__":
148    dupechecker(get_args())
def get_duplicates( paths: list[pathier.pathier.Pathier]) -> list[list[pathier.pathier.Pathier]]:
14def get_duplicates(paths: list[Pathier]) -> list[list[Pathier]]:
15    """Return a list of lists for duplicate files in `paths`."""
16    matching_sets = []
17    while len(paths) > 0:
18        comparee = paths.pop()
19        matching_files = [file for file in paths if filecmp.cmp(comparee, file, False)]
20        if matching_files:
21            [paths.pop(paths.index(file)) for file in matching_files]
22            matching_files.insert(0, comparee)
23            matching_sets.append(matching_files)
24    return matching_sets

Return a list of lists for duplicate files in paths.

def get_args() -> argparse.Namespace:
27def get_args() -> argparse.Namespace:
28    parser = argparse.ArgumentParser()
29
30    parser.add_argument(
31        "-r",
32        "--recursive",
33        action="store_true",
34        help=""" Glob files to compare recursively. """,
35    )
36
37    parser.add_argument(
38        "-i",
39        "--ignores",
40        type=str,
41        nargs="*",
42        default=[],
43        help=""" Ignore files matching these patterns.
44        e.g. `dupechecker -i *.wav` will compare all files in the current working directory except .wav files.""",
45    )
46
47    parser.add_argument(
48        "-d",
49        "--delete_dupes",
50        action="store_true",
51        help=""" After finding duplicates, delete all but one copy.
52        For each set of duplicates, the tool will ask you to enter the number corresponding to the copy you want to keep.
53        Pressing 'enter' without entering a number will skip that set without deleting anything.""",
54    )
55
56    parser.add_argument(
57        "-ad",
58        "--autodelete",
59        action="store_true",
60        help=""" Automatically decide which file to keep and which to delete from each set of duplicate files instead of asking which to keep. """,
61    )
62
63    parser.add_argument(
64        "-ns",
65        "--no_show",
66        action="store_true",
67        help=""" Don't show printout of matching files. """,
68    )
69
70    parser.add_argument(
71        "paths",
72        type=str,
73        default=[Pathier.cwd()],
74        nargs="*",
75        help=""" The paths to compare files in. """,
76    )
77
78    args = parser.parse_args()
79    if not args.paths == [Pathier.cwd()]:
80        args.paths = [Pathier(path) for path in args.paths]
81    files = []
82    print("Gathering files...")
83    for path in args.paths:
84        files.extend(
85            list(path.rglob("*.*")) if args.recursive else list(path.glob("*.*"))
86        )
87    args.paths = younotyou([str(file) for file in files], exclude_patterns=args.ignores)
88    num_comparisons = len(list(combinations(args.paths, 2)))
89    print(f"Making {num_comparisons} comparisons between {len(args.paths)} files...")
90
91    return args
def delete_wizard(matches: list[list[pathier.pathier.Pathier]]):
 94def delete_wizard(matches: list[list[Pathier]]):
 95    """Ask which file to keep for each set."""
 96    print()
 97    print("Enter the corresponding number of the file to keep.")
 98    print(
 99        "Press 'Enter' without giving a number to skip deleting any files for the given set."
100    )
101    print()
102    for match in matches:
103        map_ = {str(i): file for i, file in enumerate(match, 1)}
104        options = "\n".join(f"({i}) {file}" for i, file in map_.items()) + "\n"
105        print(options)
106        keeper = input(f"Enter number of file to keep ({', '.join(map_.keys())}): ")
107        if keeper:
108            [map_[num].delete() for num in map_ if num != keeper]

Ask which file to keep for each set.

def autodelete(matches: list[list[pathier.pathier.Pathier]]):
111def autodelete(matches: list[list[Pathier]]):
112    """Keep one of each set in `matches` and delete the others."""
113    for match in matches:
114        match.pop()
115        [file.delete() for file in match]

Keep one of each set in matches and delete the others.

def dupechecker(args: argparse.Namespace | None = None):
118def dupechecker(args: argparse.Namespace | None = None):
119    print()
120    if not args:
121        args = get_args()
122    s = [
123        ch.rjust(i + j)
124        for i in range(1, 25, 3)
125        for j, ch in enumerate(["/", "-", "\\"])
126    ]
127    s += s[::-1]
128    with Spinner(s) as spinner:
129        with ThreadPoolExecutor() as exc:
130            thread = exc.submit(get_duplicates, args.paths)
131            while not thread.done():
132                spinner.display()
133                time.sleep(0.025)
134            matches = thread.result()
135    if matches:
136        print(f"Found {len(matches)} duplicate sets of files.")
137        if not args.no_show:
138            print(griddy(matches))
139        if args.delete_dupes or args.autodelete:
140            size = args.path.size()
141            delete_wizard(matches) if args.delete_dupes else autodelete(matches)
142            deleted_size = size - args.path.size()
143            print(f"Deleted {Pathier.format_size(deleted_size)}.")
144    else:
145        print("No duplicates detected.")