Skip to content

Commit

Permalink
重构了前端和lofter专用的下载引擎
Browse files Browse the repository at this point in the history
  • Loading branch information
o5-null committed Feb 15, 2023
1 parent dfffd84 commit 9b3b039
Show file tree
Hide file tree
Showing 5 changed files with 478 additions and 28 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__pycache__
*.json
*.html
*.obj
build
dist
__pycache__
30 changes: 12 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,20 @@
本软件是为了解决lofter批量下载问题而设计的<br>
使用纯python编写<br>
## 软件特性
1.使用readability解析引擎驱动,对于网站正文提取无需手动进行适配,支持几乎所有互联网网页<br>
2.使用remi实现webui,具有良好跨平台性<br>
1.反编译了lofter客户端,直接使用api获取内容,速度极快<br>
2.使用pywebio实现webui,具有良好跨平台性<br>
3.网页本地化保存支持良好,离线保存网页与原网页基本毫无区别<br>
4.使用aria实现图片高并发下载,速度极快,支持下载原图<br>
5.支持批量下载,可解析作者、tag、标签等<br>
6.可导入css样式表自定义webui样式<br>
4.支持批量下载,可解析作者、tag、标签等<br>
5.可导入css样式表自定义webui样式<br>
6.(未实现)下载完成可转换成电子书格式<br>
## 软件使用
1.下载软件<br>
2安装aria<br>
3.根据实际情况启动服务器版或桌面版<br>
4.输入解析地址,按**解析链接**按钮开始解析
![i08lO.jpg](https://s1.328888.xyz/2022/04/15/i08lO.jpg)<br>
![i0Hhm.jpg](https://s1.328888.xyz/2022/04/15/i0Hhm.jpg)<br>
ps:软件解析过程中不会有加载进度条,请耐心等待,加载完成下载列表会显示<br>
## 软件使用情况展示
![i0DMS.jpg](https://s1.328888.xyz/2022/04/15/i0DMS.jpg)<br>
![i0qTA.jpg](https://s1.328888.xyz/2022/04/15/i0qTA.jpg)<br>
![i0lfR.jpg](https://s1.328888.xyz/2022/04/15/i0lfR.jpg)<br>
#### 下载文件展示
![i0AOq.jpg](https://s1.328888.xyz/2022/04/15/i0AOq.jpg)<br>
![i0hPP.jpg](https://s1.328888.xyz/2022/04/15/i0hPP.jpg)<br>
[![QQ-20230216010727.png](https://i.postimg.cc/PfYt87fb/QQ-20230216010727.png)](https://postimg.cc/jCqVpZ7C)
[![QQ-20230216010847.png](https://i.postimg.cc/Pq0jCWBF/QQ-20230216010847.png)](https://postimg.cc/kVNZZbMx)
[![QQ-20230216010902.png](https://i.postimg.cc/gcVW064N/QQ-20230216010902.png)](https://postimg.cc/PN5c3Jgw)
[![QQ-20230216010916.png](https://i.postimg.cc/0Q7LrLGb/QQ-20230216010916.png)](https://postimg.cc/qhMm5D5T)
[![QQ-20230216010941.png](https://i.postimg.cc/sDgNRBLB/QQ-20230216010941.png)](https://postimg.cc/BtRgTv4s)



### 当前版本为dev内测版,有问题请向开发者询问 qq1517808818
116 changes: 106 additions & 10 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,17 @@
from readability import Document
from bs4 import BeautifulSoup
from tqdm import tqdm
import wget
import unicodedata#标准编码库

from pathlib import Path
import re
import os

from datetime import datetime
import time
import json
import pickle#数据持久化


brower = requests.Session() #创建浏览器
Expand All @@ -19,6 +24,8 @@
}
api_url = 'http://url2api.applinzi.com/' # URL2Article API地址,使用体验版或购买独享资源

api_headers = {'User-Agent': 'LOFTER-Android 7.3.4 (PRA-AL00X; Android 8.0.0; null) WIFI'}
api_cookies = {'Cookie': 'usertrack=dZPgEWPiVMNF6YfwJEIHAg==; NEWTOKEN=ZGUyN2NjOTE1YzE2ZmIwOTM0ZGU5MTIwYjJkZjBhNDJkMDI3YTliNGE4M2ZhMjkxYmY3ODZkN2VkNWRhZTBkNDE1Y2NkNDg4ZDUyMDAzZWNmNWUyMjgwNWY5NTQ2MGZm; NTESwebSI=DFD9B345542ECECF843D7DC7D99313F2.lofter-tomcat-docker-lftpro-3-avkys-cd6be-774f69457-rggg6-8080'}

#get
def gets(url):
Expand All @@ -30,6 +37,24 @@ def gets(url):
if response.status_code == 404:
print('网址不存在')
return 'null'
print(response.status_code)
print(url+'访问出错')
time.sleep(1)
gets(url)
except:
print(url+'访问崩溃,请检查网络')
time.sleep(1)
gets(url)

def api_post(url):
try:
#cj = {i.split("=")[0]:i.split("=")[1] for i in api_cookies.split(";")}
response = brower.post(url=url,headers=api_headers,cookies=api_cookies,timeout=5)
if response.status_code == 200:
return response
if response.status_code == 404:
print('网址不存在')
return 'null'
print(response)
print(url+'访问出错')
time.sleep(1)
Expand All @@ -41,10 +66,11 @@ def gets(url):

#清洗文件名
def clean_name(strs):
strs = re.sub(r'/:<>?/\^|@& ', "",strs)
strs = unicodedata.normalize('NFKD',strs)
strs = re.sub(r'/<>?/\^|@& ', "",strs)
strs = strs.replace('@','')
strs = strs.replace('&','')
strs = strs.replace(' ','_')
strs = strs.replace(':','')
# 去除不可见字符
return strs

Expand All @@ -62,14 +88,42 @@ def dic(info):
#获取与修改下载列表
def downlist(set='null'):
if set == 'null':
if os.path.exists('downlist.json'):
with open('downlist.json','r') as f:
list = json.load(f)
if os.path.exists('downlist.obj') and os.path.getsize('downlist.obj') > 0:
with open('downlist.obj','rb') as f:
list = pickle.load(f)
return list
else :
with open('downlist.obj','wb') as f:
pickle.dump([],f)
return []
else:
with open('downlist.json','w') as f:
json.dump(set,f)
with open('downlist.obj','wb') as f:
pickle.dump(set,f)

#获取与修改完成列表
def finlist(set='null'):
if set == 'null':
if os.path.exists('finlist.obj') and os.path.getsize('finlist.obj') > 0:
with open('finlist.obj','rb') as f:
list = pickle.load(f)
return list
else :
with open('finlist.obj','wb') as f:
pickle.dump([],f)
return []
else:
with open('finlist.obj','wb') as f:
pickle.dump(set,f)

#删除下载任务
def del_downlist(info):
old_downlist = downlist()
new_downlist = []
for a in old_downlist:#查找一致id的内容并排除
if a['targetblogid'] != info['targetblogid'] or a['postid'] != info['postid']:
new_downlist.append(a)
downlist(new_downlist)#重新写入下载列表
return

def get_set():
with open('set.json','r') as f:
Expand Down Expand Up @@ -140,7 +194,7 @@ def down_img(img,save_dir):
with open(Path(save_dir+'/url.txt'),'w') as f:
f.write(sep.join(img))
for url in tqdm(img,desc='图片下载中:',unit='img'):
os.system('wget -N -nv '+url+' -P '+save_dir)
wget.download(url,save_dir)
#os.system('aria2c --quiet true -j 10 --continue=true --dir="'+str(Path(save_dir))+'" -i "'+str(Path(save_dir+'/url.txt'))+'"')
return

Expand Down Expand Up @@ -196,6 +250,47 @@ def lofter_info(data):
print(info['title'])
return info

#lofter客户端api解析引擎
def lofter_api(targetblogid:int,postid:int) -> dict:
"""
输入targetblogid,blogid
调用安卓客户端oldapi
status = api状态
msg = 状态信息
title = 文章标题
writer name = 作者名
writer img = 作者头图
info = 文章内容
img = 图片链接
"""

json_answer = api_post('https://api.lofter.com/oldapi/post/detail.api?product=lofter-android-7.3.4&targetblogid='+str(targetblogid)+'&supportposttypes=1,2,3,4,5,6&offset=0&postdigestnew=1&postid='+str(postid)+'&blogId='+str(targetblogid)+'&checkpwd=1&needgetpoststat=1').json()
info = {}
info['targetblogid'] = targetblogid
info['postid'] = postid
info['status'] = json_answer['meta']['status']#api状态
info['msg'] = json_answer['meta']['msg']#状态信息
try:#某些特殊玩意根本没有标题参数
info['title'] = json_answer['response']['posts'][0]['post']['title']#文章标题
except:
info['title'] = datetime.utcnow().strftime('%Y-%m-%d %H-%M-%S %f')
if info['title'] == '':
info['title'] = datetime.utcnow().strftime('%Y-%m-%d %H-%M-%S %f')
info['writer'] = {}
info['writer']['name'] = json_answer['response']['posts'][0]['post']['blogInfo']['blogNickName']#作者名
info['writer']['img'] = json_answer['response']['posts'][0]['post']['blogInfo']['bigAvaImg']#作者头图
info['info'] = json_answer['response']['posts'][0]['post']['content']#文章内容
info['type'] = json_answer['response']['posts'][0]['post']['type']#文章类型 1为文档 2为含有图片 3为音乐(?
info['img'] = []
if info['type'] == 2:
for a in json.loads(json_answer['response']['posts'][0]['post']['photoLinks']):#图片链接
info['img'].append(a['raw'])
#os.makedirs(Path('bug'),exist_ok=True)#创建临时文件夹
#save_json(str(Path('bug/'+str(targetblogid)+'_'+str(postid)+'.json')),json_answer)
#print('出现错误')
return info

#提取页面文章列表
def lofter_post_list(url):
page = 0
Expand All @@ -221,7 +316,7 @@ def lofter_post_list(url):
print('共提取'+str(len(post))+'个文章')
return post

def lofter_down(data,local_dir):
def down(data,local_dir):
text = clean_1(data['html']) #清洗文本
#检查是否有有效文本信息
test = text['txt'].replace('\n','')
Expand Down Expand Up @@ -278,4 +373,5 @@ def lofter_down(data,local_dir):
save_txt(Path(save_dir+'/index.txt'),data['txt'])
print('索引链接创建完成')
print('本地化完成')
return
return

Loading

0 comments on commit 9b3b039

Please sign in to comment.