-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdeduplicator.py
42 lines (32 loc) · 933 Bytes
/
deduplicator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import argparse
import os
import sys
argparser = argparse.ArgumentParser(
description="Mass deduplicator"
)
argparser.add_argument(
"source", metavar="SOURCE", type=str, help="Source folder"
)
argparser.add_argument(
"target", metavar="TARGET", type=str, help="Output folder"
)
args = argparser.parse_args()
def main():
if not os.path.exists(args.source):
print("Folder not found!")
sys.exit(1)
if not os.path.exists(args.target):
os.mkdir(args.target)
files = os.listdir(path=args.source)
dialogues = []
for i in files:
with open("/".join([args.source, i]), "r") as f:
c = f.read()
if len(c) == 0:
continue
if c not in dialogues:
dialogues.append(c)
with open("/".join([args.target, i]), "w") as ff:
ff.write(c)
if __name__ == "__main__":
main()