-
Notifications
You must be signed in to change notification settings - Fork 0
/
pexel.py
45 lines (36 loc) · 1.09 KB
/
pexel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import re
import requests
from bs4 import BeautifulSoup
import os
from multiprocessing import Pool
photos = []
headers = {
'Accept': '*/*',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0'
}
def get_data(url):
re = requests.get(url, headers=headers)
soup = BeautifulSoup(re.text, 'lxml')
imgs = soup.select('article.photo-item > a > img')
for img in imgs:
photo = img.get('src')
photos.append(photo)
wget()
def wget():
global photos
path = os.path.abspath('.') + '/picture'
if os.path.isdir(path) is False:
os.mkdir(path)
for photo in photos:
photo_name = re.findall('\/\d+\/(.*)\?auto', photo)
if photo_name:
Photo = os.path.join(path, photo_name[0])
res = requests.get(photo, headers=headers)
f = open(Photo, 'wb')
f.write(res.content)
f.close()
photos = []
if __name__ == '__main__':
urls = ["https://www.pexels.com/?page={}".format(str(i)) for i in range(1, 20)]
pool = Pool(12)
pool.map(get_data, urls)