-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMain.py
73 lines (57 loc) · 1.92 KB
/
Main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
__author__ = 'glcsnz123'
# _*_ coding: utf-8 _*_
import re
import urllib, urllib2
import os
from Globals import Settings, MyDeamon
import logging
import sys
_patten = re.compile("""<img id="main-comic" src=['"].+['"]""")
def img_download(url, path):
urllib.urlretrieve(url, path)
def get_url_from_key(key):
return key.split(" ")[-1][5:-1]
def _read_count_no():
cnt_no = open("cnt").readline()
return int(cnt_no)
def _write_count_no(cnt_no):
f = open("cnt", 'w')
f.write(str(cnt_no))
f.close()
def get_url(url="http://explosm.net/comics/1001/"):
try:
req = urllib2.urlopen(url)
except urllib2.HTTPError, e:
logging.warning("".join(["\t[HTTP ERROR] ", e.msg]))
return ""
logging.info("\treading web page...")
while True:
html = req.readline()
ant = _patten.findall(html)
if len(ant) > 0:
return "http:" + get_url_from_key(ant[0])
return ""
def main_task():
comics_path = "comics"
start_no = _read_count_no()
for i in range(start_no, 10000):
try:
logging.info("downloading comics [%d]" % i)
img_url = get_url("http://explosm.net/comics/%d/" % i)
if img_url == '':
logging.info("\tpage [%d] is not exits\n" % i)
continue
_write_count_no(i)
file_path = os.path.join(comics_path, ".".join([str(i), img_url.split('.')[-1]]))
logging.info("".join(["\t downloading ", img_url.split('/')[-1], " --> ", file_path]))
img_download(img_url, file_path)
except Exception, e:
logging.warning("".join(["[PRCS ERROR]", e.message]))
if __name__ == '__main__':
if len(sys.argv) < 2:
logging.info("***********FRONT***********")
main_task()
else:
logging.info("***********BACK***********")
if sys.argv[1].upper() == "START":
MyDeamon.daemonize("", main_task)