10
10
from .utils import connect_db
11
11
12
12
13
- def connect_by_nature_num ():
13
+ def connect_by_nature_num (db ):
14
14
db .run ("""
15
15
UPDATE textes_versions
16
16
SET texte_id = (
@@ -30,7 +30,7 @@ def connect_by_nature_num():
30
30
print ('connected %i rows of textes_versions based on (nature, num)' % db .changes ())
31
31
32
32
33
- def connect_by_nor ():
33
+ def connect_by_nor (db ):
34
34
db .run ("""
35
35
CREATE TEMP TABLE texte_by_nor AS
36
36
SELECT nor, min(texte_id)
@@ -61,7 +61,7 @@ def connect_by_nor():
61
61
db .run ("DROP TABLE texte_by_nor" )
62
62
63
63
64
- def connect_by_titrefull_s ():
64
+ def connect_by_titrefull_s (db ):
65
65
db .run ("""
66
66
CREATE TEMP TABLE texte_by_titrefull_s AS
67
67
SELECT DISTINCT titrefull_s, texte_id
@@ -87,7 +87,7 @@ def connect_by_titrefull_s():
87
87
db .run ("DROP TABLE texte_by_titrefull_s" )
88
88
89
89
90
- def factorize_by (key ):
90
+ def factorize_by (db , key ):
91
91
duplicates = db .all ("""
92
92
SELECT min(nature), {0}, group_concat(texte_id)
93
93
FROM textes_versions
@@ -116,8 +116,8 @@ def factorize_by(key):
116
116
print ('factorized %i duplicates into %i uniques based on %s' % (total , factorized , key ))
117
117
118
118
119
- def main ():
120
- connect_by_nature_num ()
119
+ def main (db ):
120
+ connect_by_nature_num (db )
121
121
122
122
db .run ("""
123
123
INSERT INTO textes (nature, num)
@@ -131,9 +131,9 @@ def main():
131
131
""" )
132
132
print ('inserted %i rows in textes based on (nature, num)' % db .changes ())
133
133
134
- connect_by_nature_num ()
135
- connect_by_nor ()
136
- connect_by_titrefull_s ()
134
+ connect_by_nature_num (db )
135
+ connect_by_nor (db )
136
+ connect_by_titrefull_s (db )
137
137
138
138
db .run ("""
139
139
INSERT INTO textes (nature, nor)
@@ -163,8 +163,8 @@ def main():
163
163
""" )
164
164
print ('connected %i rows of textes_versions based on nor' % db .changes ())
165
165
166
- factorize_by ('titrefull_s' )
167
- connect_by_titrefull_s ()
166
+ factorize_by (db , 'titrefull_s' )
167
+ connect_by_titrefull_s (db )
168
168
169
169
db .run ("""
170
170
INSERT INTO textes (nature, titrefull_s)
@@ -191,7 +191,7 @@ def main():
191
191
""" )
192
192
print ('connected %i rows of textes_versions based on titrefull_s' % db .changes ())
193
193
194
- factorize_by ('cid' )
194
+ factorize_by (db , 'cid' )
195
195
196
196
xml = etree .XMLParser (remove_blank_text = True )
197
197
q = db .all ("""
@@ -253,6 +253,6 @@ def main():
253
253
print ("> Normalisation des titres..." )
254
254
normalize (db )
255
255
print ("> Factorisation des textes..." )
256
- main ()
256
+ main (db )
257
257
except KeyboardInterrupt :
258
258
pass
0 commit comments