-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgettyimages.py
executable file
·86 lines (56 loc) · 2.79 KB
/
gettyimages.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
import logging
from download import media_download
import re
#GET IMAGE ID TO NAME FILES
def image_link_cleaner(image_link,**kwargs):
split_link = re.split("/", image_link)
link_id = split_link[-1]
link_id = re.sub("\?adppopup=true","",link_id)
return link_id
#IMAGE SCRAPER
def image_scraper(path,driver,directory,pages,media_number,kwargs):
#ONLY FOR IMAGE SIZE 612x612
#k tracks the number of total images downloaded
k = 1
for i in range(pages):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);') # Scroll to the end of page.
time.sleep(2) # Wait for all the images to load correctly.
images = driver.find_elements(By.XPATH, "//img[parent::picture]") # Find all images.
print(f"\nDownloading {i+1} out of {pages} pages\n")
#Loop over all elements and download
for image in images:
if k <= media_number:
try:
image_url = image.get_attribute('src') # Get the link
img_id = image_link_cleaner(image_url)
media_download(path,image_url,directory,'.png', img_id) # And download it to directory
print(f"[{k}/{media_number}] Downloaded image {img_id}")
k+=1
time.sleep(1)
except Exception as e:
logging.error(f"An error occurred in downloading the {k}th image: {str(e)}")
try:
# Move to next page
nextpage = driver.find_element(By.XPATH, "//button[contains(@data-testid, 'pagination-button-next')]")
#element.scrollTo() is a bit buggy, so we use a more complex solution.
driver.execute_script("""
const elem = arguments[0];
const y_offset = -500;
const target = elem.getBoundingClientRect().top + window.pageYOffset + y_offset;
console.log(target);
window.scrollTo(target,0);
"""
, nextpage)
#sometimes a popup that blocks the button appears
pop_up = driver.find_element(By.CLASS_NAME, "global-notification-banner__close-icon")
if pop_up:
pop_up.click()
time.sleep(1)
nextpage.click()
except Exception as e:
logging.error(f"An error occurred in scrolling: {str(e)}")
#Take a break after moving to the next page.
time.sleep(2)