-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathestLatgene.py
226 lines (182 loc) · 7.57 KB
/
estLatgene.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import pywikibot, re, os, time, sys, json
import toolforge
from datetime import date, datetime, timedelta, timezone
from pytz import timezone
from customFuncs import get_quarry
cached = False
cachedDB = False
#os.chdir(r'projects/estlat')
conn = toolforge.connect('enwiki_p','analytics')
#connLabs = toolforge.connect_tools('s53143__estlat_p')
#cursor1 = connLabs.cursor()
null = ''
utc_timezone = timezone("UTC")
lva_timezone = timezone("Europe/Riga")
def encode_if_necessary(b):
if type(b) is bytes:
return b.decode('utf8')
return b
def run_query(query,connection = conn):
#query = query.encode('utf-8')
#print(query)
try:
cursor = connection.cursor()
cursor.execute(query)
rows = cursor.fetchall()
except KeyboardInterrupt:
sys.exit()
return rows
'''
#
'''
SQL_enwiki = """select count(l.ll_lang) as langs, p.page_title, (select pp.pp_value from page_props pp where pp.pp_page=ll_from and pp_propname='wikibase_item') as wd
from langlinks l
join page p on p.page_id=l.ll_from
where p.page_namespace=0 and p.page_title in (select distinct pla.page_title
from categorylinks cla
join page pla on cla.cl_from=pla.page_id# and cla.tl_from_
where pla.page_namespace=1 and cla.cl_type="page" and cla.cl_to="WikiProject_{}_articles"
)
#where pla.page_namespace=1 and cla.cl_type="page" and cla.cl_to="WikiProject_Russia_articles")
and l.ll_from not in (select m.ll_from from langlinks m where m.ll_lang="{}")
#and l.ll_from not in (select m.ll_from from langlinks m where m.ll_lang="en")
group by l.ll_from
#having count(l.ll_lang)>20
order by count(l.ll_lang) desc
limit 1500;"""
SQL_frwiki = """select count(l.ll_lang) as langs, p.page_title, (select m2.ll_title from langlinks m2 where m2.ll_from=l.ll_from and m2.ll_lang="en") as en, (select pp.pp_value from page_props pp where pp.pp_page=ll_from and pp_propname='wikibase_item') as wd
from langlinks l
join page p on p.page_id=l.ll_from
and p.page_namespace=0 and not exists (select * from langlinks m where m.ll_from=l.ll_from and m.ll_lang="{}")
and exists (select * from categorylinks cla where cla.cl_type="page" and l.ll_from=cla.cl_from
and cla.cl_to="Portail:{}/Articles_liés"
)
group by l.ll_from
order by count(l.ll_lang) desc
limit 1500;"""
SQL_ruwiki = """select count(l.ll_lang) as langs, p.page_title, (select m2.ll_title from langlinks m2 where m2.ll_from=l.ll_from and m2.ll_lang="en") as en, (select pp.pp_value from page_props pp where pp.pp_page=ll_from and pp_propname='wikibase_item') as wd
from langlinks l
join page p on p.page_id=l.ll_from
where p.page_namespace=0 and p.page_title in (select distinct pla.page_title
from categorylinks cla
join page pla on cla.cl_from=pla.page_id# and cla.tl_from_
where pla.page_namespace=1 and cla.cl_type="page" and cla.cl_to="Статьи_проекта_{}"
)
#where pla.page_namespace=1 and cla.cl_type="page" and cla.cl_to="WikiProject_Russia_articles")
and l.ll_from not in (select m.ll_from from langlinks m where m.ll_lang="{}")
#and l.ll_from not in (select m.ll_from from langlinks m where m.ll_lang="en")
group by l.ll_from
#having count(l.ll_lang)>20
order by count(l.ll_lang) desc
limit 1500;"""
def encode_all_items(row):
return [encode_if_necessary(f) for f in row]
def utc_to_local(utc_dt):
return utc_timezone.localize(utc_dt).astimezone(lva_timezone)
#
def enwiki_query(countryname,country,language):
printInfo('en',countryname,country,language)
if cached:
query_res = get_quarry('30043','1')
results = {f[2]:[f[0], ['en',f[1].replace('_',' ')]] for f in query_res}
final2 = sort_list(results)
saveToDB(final2,language,country,'en')
return results
else:
query_res = run_query(SQL_enwiki.format(country,language),toolforge.connect('enwiki_p','analytics'))
result_json = [encode_all_items(f) for f in query_res]
results = {f[2]:[f[0], ['en',f[1].replace('_',' ')]] for f in result_json}
final2 = sort_list(results)
saveToDB(final2,language,countryname,'en')
return results
#
def ruwiki_query(countryname,country,language):
printInfo('ru',countryname,country,language)
if cached:
return {}
else:
query_res = run_query(SQL_ruwiki.format(country,language),toolforge.connect('ruwiki_p','analytics'))
result_json = [encode_all_items(f) for f in query_res]
frwikidata = {}
for line in result_json:
iws,fr,en,wd = line
if en is None:
frwikidata.update({wd:[iws,['ru',fr.replace('_',' ')]]})
else:
frwikidata.update({wd:[iws,['en',en]]})
final2 = sort_list(frwikidata)
saveToDB(final2,language,countryname,'ru')
return frwikidata
#
sql_update = 'UPDATE `lists` SET jsondata=%s, last_upd=%s where wiki=%s and name=%s and list_type=%s and source=%s'
def saveToDB(jsonData,wiki,countryname,source):
curr_time = utc_to_local(datetime.utcnow())
currTime = "{0:%Y%m%d%H%M%S}".format(curr_time)
#jsondata=%s, last_upd=%s where wiki=%s and name=%s and list_type=%s'
if cachedDB:
with open('cats datggfgdfga2{}{}.txt'.format(wiki,source), "w", encoding='utf-8') as fileW:
fileW.write(str(jsonData))
else:
connLabs = toolforge.connect_tools('s53143__estlat_p')
cursor1 = connLabs.cursor()
cursor1.execute(sql_update, (str(json.dumps(jsonData)),currTime,wiki,countryname,"list",source))
connLabs.commit()
connLabs.close()
#
def printInfo(FUNC,countryname,country,language):
pywikibot.output("FUNC: {}\tcountryname: {}\tcountry: {}\tlanguage: {}".format(FUNC,countryname,country,language))
def frwiki_query(countryname,country,language):
printInfo('fr',countryname,country,language)
if cached:
query_res = get_quarry('30043','0')
frwikidata = {}
for line in query_res:
iws,fr,en,wd = line
if en is None:
frwikidata.update({wd:[iws,['fr',fr.replace('_',' ')]]})
else:
frwikidata.update({wd:[iws,['en',en]]})
final2 = sort_list(frwikidata)
saveToDB(final2,language,country,'fr')
return frwikidata
else:
query_res = run_query(SQL_frwiki.format(language,country),toolforge.connect('frwiki_p','analytics'))
result_json = [encode_all_items(f) for f in query_res]
frwikidata = {}
for line in result_json:
iws,fr,en,wd = line
if en is None:
frwikidata.update({wd:[iws,['fr',fr.replace('_',' ')]]})
else:
frwikidata.update({wd:[iws,['en',en]]})
final2 = sort_list(frwikidata)
saveToDB(final2,language,countryname,'fr')
return frwikidata
#
def sort_list(thelist):
final = []
for one in thelist:
thisdata = thelist[one]
final.append([one,thisdata[0],thisdata[1]])
final2 = sorted(final, key=lambda x: -int(x[1]))
return final2
def main():
languages = ['lv','et']
countries = {'lv':{'en':'Latvia','fr':'Lettonie','ru':'Латвия'},'et':{'en':'Estonia','fr':'Estonie','ru':'Эстония'}}
for country in countries:#kuras valsts saraksts tiek veidots
if country=='et': continue#Latvija ir novembrī, to vēlāk
countryname = 'Latvia'
countrydata = countries[country]
for wiki in languages:#kurai wiki šis saraksts domāts
countryres = {}
countryres.update(frwiki_query(countryname,countrydata['fr'],wiki))
#countryres.update(enwiki_query(countryname,countrydata['en'],wiki))
countryres.update(ruwiki_query(countryname,countrydata['ru'],wiki))
final2 = sort_list(countryres)
#saveToDB(final2,wiki,countryname,'all')
#connLabs.close()
conn.close()
pywikibot.output('done')
#
#
main()