-
Notifications
You must be signed in to change notification settings - Fork 49
/
tikstar.py
29 lines (25 loc) · 968 Bytes
/
tikstar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""
@Description: 解析www.tikstar.com网站相关内容,获取tags
@Date :2021/12/22
@Author :xhunmon
@Mail :xhunmon@gmail.com
"""
from bs4 import BeautifulSoup
import file_util as futil
def parse_tags(page):
'''解析页面,返回标题和文章页面内容,如果生成文章则还需要组装'''
soup = BeautifulSoup(page, 'html.parser')
trs = soup.find('tbody').find_all('tr')
result = []
for tr in trs:
tds = tr.find_all('td')
tag_name = tds[0].find('h3').text.replace('\n', '').replace(' ', '')
video_num = tds[1].text.replace('\n', '').replace(' ', '')
views = tds[2].text.replace('\n', '').replace(' ', '')
result.append('标签:{} 视频数:{} 观看数:{}'.format(tag_name, video_num, views))
return result
if __name__ == '__main__':
html = futil.read('tags.html')
result = parse_tags(html)
print(result)
futil.write_json(result, 'shoes.json')