forked from hardikvasa/google-images-download
-
Notifications
You must be signed in to change notification settings - Fork 88
/
clean_images.py
31 lines (26 loc) · 885 Bytes
/
clean_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import glob
import os
import numpy as np
from skimage import io, transform # conda install -c conda-forge scikit-image
from tqdm import tqdm
max_wh = 2000 # max image size
files = list(glob.iglob("images/**/*.*", recursive=True))
for f in tqdm(files, desc="Scanning images", total=len(files)):
# Remove bad suffixes
suffix = f.split(".")[-1]
if suffix in ["gif", "svg"]:
print(f"Removing {f}")
os.remove(f)
continue
# Read Image
try:
img = io.imread(f)
# Downsize to max_wh if necessary
r = max_wh / max(img.shape) # ratio
if r < 1: # resize
print(f"Resizing {f}")
img = transform.resize(img, (round(img.shape[0] * r), round(img.shape[1] * r)))
io.imsave(f, img.astype(np.uint8))
except Exception:
print(f"Removing corrupted {f}")
os.remove(f)