Skip to content

Commit 02d7a60

Browse files
authored
Merge pull request #32 from Ashutosh-agarwal2004/feature/duplicate-file-detector
Feature/duplicate file detector issue: #8
2 parents 2c43f20 + 3d362c5 commit 02d7a60

File tree

3 files changed

+121
-0
lines changed

3 files changed

+121
-0
lines changed
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# 🧮 File Duplicator Detector
2+
3+
A simple Python tool to **find and manage duplicate files** in any folder — by **name**, **size**, or **content hash**.
4+
5+
---
6+
7+
## 🚀 Features
8+
- Detect duplicates by:
9+
- 🔠 **File name**
10+
- 📏 **File size**
11+
- 🔐 **File hash (SHA-256)**
12+
- Option to:
13+
- 🧾 **List duplicates only**
14+
- 🗑 **Safely delete duplicates** (keeps one copy)
15+
16+
---
17+
18+
## ⚙️ How to Run
19+
1. Clone the repository:
20+
```bash
21+
git clone https://github.com/<your-username>/<repo>.git
22+
cd REPOSITORY-NAME/Python/duplicate_file_detector
23+
```
24+
25+
2. Run the script:
26+
```bash
27+
python find_duplicates.py
28+
```
29+
30+
3. Follow on-screen instructions:
31+
- Choose scan mode (by name / size / hash)
32+
- Decide whether to delete duplicates
33+
34+
---
35+
36+
## 📦 Requirements
37+
No external libraries needed.
38+
39+
## 🖼️ Example Output
40+
![App Screenshot](Python\duplicate_file_detector\Screenshots "Screenshot1")
41+
42+
43+
## 👨‍💻 Author
44+
Ashutosh Agarwal(https://github.com/Ashutosh-agarwal2004)
45+
18.4 KB
Loading
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import os
2+
import hashlib
3+
from collections import defaultdict
4+
5+
def get_file_hash(path, chunk_size=8192):
6+
"""Generate SHA256 hash for a file."""
7+
h = hashlib.sha256()
8+
try:
9+
with open(path, "rb") as f:
10+
while chunk := f.read(chunk_size):
11+
h.update(chunk)
12+
return h.hexdigest()
13+
except (PermissionError, FileNotFoundError):
14+
return None
15+
16+
def scan_folder(folder_path, mode="hash"):
17+
"""Scan folder and find duplicates based on mode ('name', 'size', or 'hash')."""
18+
duplicates = defaultdict(list)
19+
for root, _, files in os.walk(folder_path):
20+
for file in files:
21+
file_path = os.path.join(root, file)
22+
if mode == "name":
23+
key = file
24+
elif mode == "size":
25+
try:
26+
key = os.path.getsize(file_path)
27+
except OSError:
28+
continue
29+
elif mode == "hash":
30+
key = get_file_hash(file_path)
31+
if not key:
32+
continue
33+
else:
34+
raise ValueError("Mode must be 'name', 'size', or 'hash'.")
35+
duplicates[key].append(file_path)
36+
return {k: v for k, v in duplicates.items() if len(v) > 1}
37+
38+
def display_duplicates(duplicates):
39+
if not duplicates:
40+
print("✅ No duplicates found.")
41+
return
42+
print("\n🔍 Duplicate Files Found:\n")
43+
for group, files in duplicates.items():
44+
print(f"Group ({len(files)} files):")
45+
for f in files:
46+
print(" ", f)
47+
print("-" * 60)
48+
49+
def delete_duplicates(duplicates):
50+
for group, files in duplicates.items():
51+
keep = files[0]
52+
print(f"\nKeeping: {keep}")
53+
for f in files[1:]:
54+
try:
55+
os.remove(f)
56+
print(f"🗑 Deleted duplicate: {f}")
57+
except Exception as e:
58+
print(f"⚠️ Could not delete {f}: {e}")
59+
60+
if __name__ == "__main__":
61+
folder = input("Enter folder path to scan: ").strip()
62+
print("\nSelect comparison mode:")
63+
print("1. By name\n2. By size\n3. By hash (recommended)")
64+
mode_choice = input("Enter choice (1/2/3): ").strip()
65+
mode = {"1": "name", "2": "size", "3": "hash"}.get(mode_choice, "hash")
66+
67+
duplicates = scan_folder(folder, mode)
68+
display_duplicates(duplicates)
69+
70+
if duplicates:
71+
action = input("\nDo you want to delete duplicates? (y/n): ").strip().lower()
72+
if action == "y":
73+
delete_duplicates(duplicates)
74+
print("\n✅ Deletion complete.")
75+
else:
76+
print("\nNo files were deleted.")

0 commit comments

Comments
 (0)