-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_scrape.py
63 lines (45 loc) · 1.66 KB
/
image_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#This is automation used to download images from google for training our CNN model.
from selenium import webdriver
import os
import urllib.request
import time
path = r'C:\Program Files (x86)\chromedriver.exe'
url_prefix = "https://www.google.com.sg/search?q="
url_postfix = "&source=lnms&tbm=isch&sa=X&ei=0eZEVbj3IJG5uATalICQAQ&ved=0CAcQ_AUoAQ&biw=939&bih=591"
save_folder = 'train'
def main():
if not os.path.exists(save_folder):
os.mkdir(save_folder)
download_images()
def download_images():
#no of images you want
n=100;
#keyword for what you are searching
search_for='null'
search_url = url_prefix+search_for+url_postfix
path = r'C:\Program Files (x86)\chromedriver.exe'
driver = webdriver.Chrome(path)
driver.get(search_url)
value = 0
for i in range(3):
driver.execute_script("scrollBy("+ str(value) +",+1000);")
value += 1000
time.sleep(1)
elem1 = driver.find_element_by_id('islmp')
sub = elem1.find_elements_by_tag_name('img')
count=0
for j,i in enumerate(sub):
if j < n:
src = i.get_attribute('src')
try:
if src != None:
src = str(src)
urllib.request.urlretrieve(src, os.path.join(save_folder, search_for+str(count)+'.jpg'))
else:
raise TypeError
except Exception as e: #catches type error along with other errors
print(f'fail with error {e}')
count+=1
driver.close()
if __name__ == "__main__":
main()