-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
82 lines (68 loc) · 2.67 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'YI'
import requests
from bs4 import BeautifulSoup
import os
class MeiZe:
def __init__(self, url, page):
self.url = url + str(page)
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/60.0.3112.113 Safari/537.36',
'Referer': 'http://www.mzitu.com/'
}
self.req = requests.session()
self.all_a = []
self.all_a_title = []
self.all_a_max = []
os.makedirs(os.path.join(os.getcwd(), 'mzitu'))
os.chdir(os.path.join(os.getcwd(), 'mzitu'))
self.initpwd = os.getcwd()
# 解析网页
def domainHtml(self):
html = self.req.get(self.url, headers=self.headers)
lis = BeautifulSoup(html.text, 'lxml').find('div', class_='postlist').find_all('li')
for a in lis:
imgurl = a.find('a')['href']
self.all_a.append(imgurl)
# 获取页面信息
def getMaxPage(self):
for a in self.all_a:
imghtml = self.req.get(a, headers=self.headers)
title = BeautifulSoup(imghtml.text, 'lxml').find('h2', class_='main-title').string
# 获取标题
last = BeautifulSoup(imghtml.text, 'lxml').find('div', class_='pagenavi').find_all('span')
last = int(last[-2].string)
self.all_a_title.append(title)
self.all_a_max.append(last)
# 下载妹子
def downloading(self):
cnt = 0
print('当前页妹子总数: %s' % len(self.all_a))
for a in self.all_a:
print('正在下载第 %s 个妹子...' % (cnt + 1))
os.makedirs(os.path.join(os.getcwd(), self.all_a_title[cnt]))
os.chdir(os.path.join(os.getcwd(), self.all_a_title[cnt]))
for i in range(1, self.all_a_max[cnt] + 1):
nurl = a + '/' + str(i)
imghtml = self.req.get(nurl, headers=self.headers)
optimize = BeautifulSoup(imghtml.text, 'lxml').find('div', class_='main-image').find('img')['src']
img = self.req.get(optimize, headers=self.headers)
f = open(str(i) + '.jpg', 'ab')
f.write(img.content)
f.close()
cnt += 1
os.chdir(self.initpwd)
print('本页下载完成,进入下一页下载!')
if __name__ == '__main__':
# 当前页
current = 1
# 总页数
total = 100
while current < total:
mz = MeiZe("http://www.mzitu.com/page/", current)
mz.domainHtml()
mz.getMaxPage()
mz.downloading()
current += 1