@@ -85,6 +85,13 @@ class BookwormSQLDatabase:
85
85
"""
86
86
87
87
def __init__ (self ,dbname = None ,variableFile = "files/metadata/jsoncatalog_derived.txt" ):
88
+ """
89
+ You can initialize it with a database name; otherwise it defaults to finding a
90
+ Bookworm configuration file.
91
+
92
+ It also may be initialized with a set of metadata. This is a little wonky, and may
93
+ be deprecated in favor of a cleaner interface.
94
+ """
88
95
config = ConfigParser .ConfigParser (allow_no_value = True )
89
96
config .read (["~/.my.cnf" ,"/etc/my.cnf" ,"/etc/mysql/my.cnf" ,"bookworm.cnf" ])
90
97
if dbname == None :
@@ -98,8 +105,12 @@ def __init__(self,dbname=None,variableFile="files/metadata/jsoncatalog_derived.t
98
105
self .setVariables (originFile = variableFile )
99
106
100
107
def grantPrivileges (self ):
101
- #Grants select-only privileges to a non-admin mysql user for the API to
102
- #query with (safer).
108
+ """
109
+ Grants select-only privileges to a non-admin mysql user for the API to
110
+ query with (safer).
111
+
112
+ The Username for these privileges is pulled from the bookworm.cnf file.
113
+ """
103
114
config = ConfigParser .ConfigParser (allow_no_value = True )
104
115
config .read (["~/.my.cnf" ,"/etc/my.cnf" ,"/etc/mysql/my.cnf" ,"bookworm.cnf" ])
105
116
username = config .get ("client" ,"user" )
@@ -110,6 +121,14 @@ def setVariables(self,originFile,anchorField="bookid",jsonDefinition="files/meta
110
121
self .variableSet = variableSet (originFile = originFile , anchorField = anchorField , jsonDefinition = jsonDefinition ,db = self .db )
111
122
112
123
def importNewFile (self ,originFile ,anchorField ,jsonDefinition ):
124
+ """
125
+ Add additional metadata from a source collection of json-formatted rows.
126
+ originFile is the filename of the new metadata, in the same input format
127
+ as the original jsoncatalog.txt
128
+ anchorField is the field in the existing dataset it should be anchored onto;
129
+ jsonDefinition is a filename pointing to a file
130
+ of the format of field_descriptions.json describing the new data to ingest.
131
+ """
113
132
self .setVariables (originFile ,anchorField = anchorField ,jsonDefinition = jsonDefinition )
114
133
self .variableSet .writeMetadata ()
115
134
self .load_book_list ()
@@ -160,8 +179,8 @@ def load_word_list(self):
160
179
161
180
def load_book_list (self ):
162
181
"""
163
- Loads in the tables that have already been created by calling
164
- `Bookworm.variableSet.writeMetadata()`
182
+ Loads in the tables that have already been created by a previous
183
+ call to `Bookworm.variableSet.writeMetadata()`
165
184
"""
166
185
self .variableSet .loadMetadata ()
167
186
@@ -176,7 +195,8 @@ def create_unigram_book_counts(self):
176
195
db .query ("ALTER TABLE master_bookcounts DISABLE KEYS" )
177
196
print "loading data using LOAD DATA LOCAL INFILE"
178
197
for filename in os .listdir ("files/texts/encoded/unigrams" ):
179
- if filename [- 4 :] != '.txt' :
198
+ if not filename .endswith ('.txt' ):
199
+ # Sometimes other files are in there; skip them.
180
200
continue
181
201
try :
182
202
db .query ("LOAD DATA LOCAL INFILE 'files/texts/encoded/unigrams/" + filename + "' INTO TABLE master_bookcounts CHARACTER SET utf8 (bookid,wordid,count);" )
@@ -236,7 +256,8 @@ def loadVariableDescriptionsIntoDatabase(self):
236
256
237
257
def reloadMemoryTables (self ,force = False ):
238
258
"""
239
- Checks to see if memory tables need to be repopulated, and then does so if they are empty.
259
+ Checks to see if memory tables need to be repopulated (by seeing if they are empty)
260
+ and then does so if necessary.
240
261
"""
241
262
existingCreateCodes = self .db .query ("SELECT tablename,memoryCode FROM masterTableTable" ).fetchall ();
242
263
for row in existingCreateCodes :
@@ -339,6 +360,9 @@ def create_API_settings(self):
339
360
db .query ("INSERT INTO API_settings VALUES ('%s');" % addCode )
340
361
341
362
def update_Porter_stemming (self ): #We use stems occasionally.
363
+ """
364
+ Still not executed.
365
+ """
342
366
print "Updating stems from Porter algorithm..."
343
367
from nltk import PorterStemmer
344
368
stemmer = PorterStemmer ()
@@ -354,8 +378,7 @@ def update_Porter_stemming(self): #We use stems occasionally.
354
378
355
379
def addCategoricalFromFile (self ,filename ,unique = False ):
356
380
"""
357
- Useful, but still a bit of a hack--should be a special method of adding a group
358
- that automatically creates the json file.
381
+ No longer used: delete this code block.
359
382
"""
360
383
file = open (filename )
361
384
firstTwo = file .readline ().split ("\t " )
@@ -364,7 +387,7 @@ def addCategoricalFromFile(self,filename,unique=False):
364
387
definition = {"field" :name ,"datatype" :"categorical" ,"type" :"character" ,"unique" :False }
365
388
366
389
#Currently the anchortype has to be a MediumInt.
367
- #That's extremely inefficient.
390
+ #That's a little inefficient if joining on a smaller document. .
368
391
anchorType = "MEDIUMINT"
369
392
370
393
thisField = dataField (definition ,
0 commit comments