-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
100 lines (77 loc) · 3.1 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
import xml.etree.ElementTree as etree
from settings import *
import commands
import tempfile
import urllib
import os
import os.path
import logging
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s',
filename='/tmp/metadoc.log',
filemode='w')
def doc_url(doc, lang = 'en', cvs=False, diff = False):
if lang == 'ja':
f_path = doc.meta_info['file_ja_path']
else:
f_path = doc.meta_info['file_en_path']
if diff:
return CONFIG['CVS_URL_BASE'] + f_path + '?r1=%s&r2=%s' % (doc.ja_org_rev, doc.en_cvs_rev)
if cvs:
return CONFIG['CVS_URL_BASE'] + f_path
else:
return CONFIG['GENTOO_URL_BASE'] + f_path
def docdiff_url(doc):
file_path = doc.meta_info['file_en_path']
cvs_url = "http://sources.gentoo.org/cgi-bin/viewcvs.cgi/*checkout*/xml/htdocs" + file_path + "?rev=%s&hideattic=0&root=gentoo&content-type=text/plain"
old_file = tempfile.mkstemp()
new_file = tempfile.mkstemp()
urllib.urlretrieve(cvs_url % (doc.ja_org_rev,) , old_file[1])
urllib.urlretrieve(cvs_url % (doc.en_cvs_rev,) , new_file[1])
docdiff_command = '/usr/bin/docdiff --utf8 --html %s %s' % (old_file[1], new_file[1])
diff_html = commands.getoutput(docdiff_command)
diff_html_path = None
try:
diff_html_path = CONFIG['DIFF_DIR'] + doc.meta_info['file_id'] + '.' + doc.ja_org_rev + '_' + doc.en_cvs_rev + '.html'
html = open(diff_html_path, 'w')
html.write(diff_html)
html.close()
except:
logging.warning('docdiff_url: warning')
logging.warning(doc.__str__())
logging.warning(doc.meta_info['file_id'])
os.remove(old_file[1])
os.remove(new_file[1])
return os.path.basename(diff_html_path)
def guess_categories(info_list):
for info in info_list:
try:
tree = etree.parse(CONFIG['BASE_PATH'] + info['file_en_path'])
included_docs = tree.findall('.//include')
order = 0
if included_docs.__len__() > 0:
for d in included_docs:
d_path = d.get('href')
for i in info_list:
if d_path == os.path.basename(i['file_en_path']): # searching meta-info by including path
i['en_memberof'].update(info['en_memberof'])
i['ja_memberof'].update(info['ja_memberof'])
i['order'] = order
order = order +1
except:
pass
return info_list
def guess_categories_coverpage(hb_list):
for info in hb_list:
if info['file_id'] in CONFIG['COVERS']:
f_id = info['file_id']
info['en_memberof'].update({f_id: CONFIG[f_id][0]})
info['ja_memberof'].update({f_id: CONFIG[f_id][1]})
hb_list = guess_categories(hb_list)
# sort by index
for info in hb_list:
if not 'order' in info:
info['order'] = -1
hb_list.sort(cmp=lambda x,y: cmp(x['order'], y['order']))
return hb_list