Skip to content

Commit

Permalink
增加代理服务
Browse files Browse the repository at this point in the history
  • Loading branch information
pjialin committed Aug 16, 2019
1 parent f9036b7 commit ad4b7fc
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class AppEnvType:
DEFAULT_STATS_CHECK_INTERVAL = 10 # IP 数据统计间隔

DEFAULT_REQUEST_TIME_OUT = 5
DEFAULT_REQUEST_CHECK_TIME_OUT = 3
DEFAULT_REQUEST_CHECK_TIME_OUT = 5

# Rate
RE_PUSH_TO_CHECK_POOL_RATE = 0.6 # 如果 总 IP 数量 > IP 池数量 * Rate 则跳过本次推送任务
Expand Down
34 changes: 34 additions & 0 deletions src/sites/github_proxy_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from src.app.ip_get import IPGet, SiteResponse
from src.lib.structs import SiteData, SiteResponseData

key = 'github_proxy_list'


@IPGet.config(key)
def config():
site = SiteData()
site.name = 'Github proxy list'
site.pages = ['https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt']
return site


@IPGet.parse(key)
def parse(resp: SiteResponse):
import re
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
for ip in ips:
try:
item = ip.split(':')
res = SiteResponseData()
res.ip = item[0]
res.port = item[1]
yield res
except Exception:
continue


if __name__ == '__main__':
from src.lib.func import run_until_complete

runner = IPGet.test_crawl(key)
run_until_complete(runner)
34 changes: 34 additions & 0 deletions src/sites/proxy_daily.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from src.app.ip_get import IPGet, SiteResponse
from src.lib.structs import SiteData, SiteResponseData

key = 'proxy_daily'


@IPGet.config(key)
def config():
site = SiteData()
site.name = 'Proxy daily'
site.pages = ['https://proxy-daily.com/']
return site


@IPGet.parse(key)
def parse(resp: SiteResponse):
import re
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
for ip in ips:
try:
item = ip.split(':')
res = SiteResponseData()
res.ip = item[0]
res.port = item[1]
yield res
except Exception:
continue


if __name__ == '__main__':
from src.lib.func import run_until_complete

runner = IPGet.test_crawl(key)
run_until_complete(runner)
34 changes: 34 additions & 0 deletions src/sites/spys_me.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from src.app.ip_get import IPGet, SiteResponse
from src.lib.structs import SiteData, SiteResponseData

key = 'spysme'


@IPGet.config(key)
def config():
site = SiteData()
site.name = 'Spys.me'
site.pages = ['http://spys.me/proxy.txt']
return site


@IPGet.parse(key)
def parse(resp: SiteResponse):
import re
ips = re.findall(r'(?:\d{1,3}\.){3}\d{1,3}:\d+', resp.text)
for ip in ips:
try:
item = ip.split(':')
res = SiteResponseData()
res.ip = item[0]
res.port = item[1]
yield res
except Exception:
continue


if __name__ == '__main__':
from src.lib.func import run_until_complete

runner = IPGet.test_crawl(key)
run_until_complete(runner)

0 comments on commit ad4b7fc

Please sign in to comment.