-
Notifications
You must be signed in to change notification settings - Fork 1
/
fdup.py
executable file
·76 lines (64 loc) · 2.05 KB
/
fdup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# fdup - A program to find duplicated files
# Copyright (C) 2010 Roland Kammerer
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import sys
import os
import hashlib
def inodesort(a, b):
return int(os.stat(a).st_ino - os.stat(b).st_ino)
if len(sys.argv) != 1:
print sys.argv[0], ":"
print "read files from stdin"
sys.exit(1)
K = (1024)
M = (K * K)
d = dict()
fnames = [ fname.rstrip() for fname in sys.stdin.readlines() ]
if not fnames: sys.exit(1)
fnames = sorted(fnames, inodesort)
for fname in fnames:
try:
fp = open(fname, 'r')
except IOError as (errno, strerror):
print >> sys.stderr, "I/O error({0}): {1} ({2})".format(errno, strerror, fname)
continue
key = fp.read(1*K)
fp.close()
key += str(os.stat(fname).st_size)
if not d.has_key(key):
d[key] = list()
d[key].append(fname)
for fnames in d:
if len(d[fnames]) == 1: continue
hashes = dict()
for fname in d[fnames]:
#m = hashlib.sha1()
m = hashlib.md5()
fp = open(fname, 'r')
while True:
content = fp.read(5*M)
if content: m.update(content)
else: break
fp.close()
digest = m.digest()
if not hashes.has_key(digest):
hashes[digest] = list()
hashes[digest].append(fname)
for digest in hashes:
if len(hashes[digest]) > 1:
for fname in hashes[digest]:
print fname
print