Skip to content

Commit 5cb4601

Browse files
committed
More comments
1 parent 920c0fa commit 5cb4601

File tree

1 file changed

+32
-9
lines changed

1 file changed

+32
-9
lines changed

bookworm/CreateDatabase.py

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ class BookwormSQLDatabase:
8585
"""
8686

8787
def __init__(self,dbname=None,variableFile="files/metadata/jsoncatalog_derived.txt"):
88+
"""
89+
You can initialize it with a database name; otherwise it defaults to finding a
90+
Bookworm configuration file.
91+
92+
It also may be initialized with a set of metadata. This is a little wonky, and may
93+
be deprecated in favor of a cleaner interface.
94+
"""
8895
config = ConfigParser.ConfigParser(allow_no_value=True)
8996
config.read(["~/.my.cnf","/etc/my.cnf","/etc/mysql/my.cnf","bookworm.cnf"])
9097
if dbname==None:
@@ -98,8 +105,12 @@ def __init__(self,dbname=None,variableFile="files/metadata/jsoncatalog_derived.t
98105
self.setVariables(originFile=variableFile)
99106

100107
def grantPrivileges(self):
101-
#Grants select-only privileges to a non-admin mysql user for the API to
102-
#query with (safer).
108+
"""
109+
Grants select-only privileges to a non-admin mysql user for the API to
110+
query with (safer).
111+
112+
The Username for these privileges is pulled from the bookworm.cnf file.
113+
"""
103114
config = ConfigParser.ConfigParser(allow_no_value=True)
104115
config.read(["~/.my.cnf","/etc/my.cnf","/etc/mysql/my.cnf","bookworm.cnf"])
105116
username=config.get("client","user")
@@ -110,6 +121,14 @@ def setVariables(self,originFile,anchorField="bookid",jsonDefinition="files/meta
110121
self.variableSet = variableSet(originFile=originFile, anchorField=anchorField, jsonDefinition=jsonDefinition,db=self.db)
111122

112123
def importNewFile(self,originFile,anchorField,jsonDefinition):
124+
"""
125+
Add additional metadata from a source collection of json-formatted rows.
126+
originFile is the filename of the new metadata, in the same input format
127+
as the original jsoncatalog.txt
128+
anchorField is the field in the existing dataset it should be anchored onto;
129+
jsonDefinition is a filename pointing to a file
130+
of the format of field_descriptions.json describing the new data to ingest.
131+
"""
113132
self.setVariables(originFile,anchorField=anchorField,jsonDefinition=jsonDefinition)
114133
self.variableSet.writeMetadata()
115134
self.load_book_list()
@@ -160,8 +179,8 @@ def load_word_list(self):
160179

161180
def load_book_list(self):
162181
"""
163-
Loads in the tables that have already been created by calling
164-
`Bookworm.variableSet.writeMetadata()`
182+
Loads in the tables that have already been created by a previous
183+
call to `Bookworm.variableSet.writeMetadata()`
165184
"""
166185
self.variableSet.loadMetadata()
167186

@@ -176,7 +195,8 @@ def create_unigram_book_counts(self):
176195
db.query("ALTER TABLE master_bookcounts DISABLE KEYS")
177196
print "loading data using LOAD DATA LOCAL INFILE"
178197
for filename in os.listdir("files/texts/encoded/unigrams"):
179-
if filename[-4:] != '.txt':
198+
if not filename.endswith('.txt'):
199+
# Sometimes other files are in there; skip them.
180200
continue
181201
try:
182202
db.query("LOAD DATA LOCAL INFILE 'files/texts/encoded/unigrams/"+filename+"' INTO TABLE master_bookcounts CHARACTER SET utf8 (bookid,wordid,count);")
@@ -236,7 +256,8 @@ def loadVariableDescriptionsIntoDatabase(self):
236256

237257
def reloadMemoryTables(self,force=False):
238258
"""
239-
Checks to see if memory tables need to be repopulated, and then does so if they are empty.
259+
Checks to see if memory tables need to be repopulated (by seeing if they are empty)
260+
and then does so if necessary.
240261
"""
241262
existingCreateCodes = self.db.query("SELECT tablename,memoryCode FROM masterTableTable").fetchall();
242263
for row in existingCreateCodes:
@@ -339,6 +360,9 @@ def create_API_settings(self):
339360
db.query("INSERT INTO API_settings VALUES ('%s');" % addCode)
340361

341362
def update_Porter_stemming(self): #We use stems occasionally.
363+
"""
364+
Still not executed.
365+
"""
342366
print "Updating stems from Porter algorithm..."
343367
from nltk import PorterStemmer
344368
stemmer = PorterStemmer()
@@ -354,8 +378,7 @@ def update_Porter_stemming(self): #We use stems occasionally.
354378

355379
def addCategoricalFromFile(self,filename,unique=False):
356380
"""
357-
Useful, but still a bit of a hack--should be a special method of adding a group
358-
that automatically creates the json file.
381+
No longer used: delete this code block.
359382
"""
360383
file = open(filename)
361384
firstTwo = file.readline().split("\t")
@@ -364,7 +387,7 @@ def addCategoricalFromFile(self,filename,unique=False):
364387
definition = {"field":name,"datatype":"categorical","type":"character","unique":False}
365388

366389
#Currently the anchortype has to be a MediumInt.
367-
#That's extremely inefficient.
390+
#That's a little inefficient if joining on a smaller document..
368391
anchorType = "MEDIUMINT"
369392

370393
thisField = dataField(definition,

0 commit comments

Comments
 (0)