Skip to content

Commit

Permalink
Lazy loading of models; fix production setup
Browse files Browse the repository at this point in the history
  • Loading branch information
tiberiuichim committed Jul 11, 2019
1 parent 82fff41 commit 7ce8d34
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 16 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ RUN pip3 install -e /app
RUN python -m nltk.downloader -d /data/nltk_data/ stopwords
RUN python -m nltk.downloader -d /data/nltk_data/ punkt
RUN python -m nltk.downloader -d /data/nltk_data/ wordnet
RUN python -m nltk.downloader -d /data/nltk_data/ averaged_perceptron_tagger

EXPOSE 6543
WORKDIR /app
Expand Down
5 changes: 4 additions & 1 deletion nlpservice/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ def prepare_model_loaders(config, prefix, model_loader):

def config_wrapper(factory):
def inner():
return factory(config)
try:
return factory(config)
except:
logger.exception("Could not run model factory %r", factory)

return inner

Expand Down
26 changes: 17 additions & 9 deletions nlpservice/nlp/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,21 +441,29 @@ def kg_classifier_keras(config):
kg_url = settings['nlp.kg_url']
kg_elastic = settings['nlp.kg_elastic']

kg = get_lemmatized_kg(kg_url)
labels = list(sorted(kg.keys()))
corpus_path = settings['nlp.kg_corpus']

session = nongpu_session()
loaded = []

with session.as_default():
model = load_model(model_path)
def load():
kg = get_lemmatized_kg(kg_url)
labels = list(sorted(kg.keys()))
session = nongpu_session()

kv_model = FastText.load(ft_model_path)
vocab = kv_model.wv.index2word
label_encoder = make_labelencoder(labels)
with session.as_default():
model = load_model(model_path)

corpus_path = settings['nlp.kg_corpus']
kv_model = FastText.load(ft_model_path)
vocab = kv_model.wv.index2word
label_encoder = make_labelencoder(labels)

loaded.extend(model, vocab, label_encoder)

def predict(text):
if not loaded:
load()

model, vocab, label_encoder = loaded
maxlen = model.inputs[0].get_shape()[1].value

k = _predict(text, model, label_encoder, vocab, maxlen)
Expand Down
13 changes: 7 additions & 6 deletions production.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@ pyramid.debug_templates = false
pyramid.default_locale_name = en

nlp.tf_model_cache_path = /data/model_cache
;
; nlp.classifiers.kg = nlpservice.nlp.classify.kg_classify_settings

# save downloaded corpus text to this file;
# Used by both defined classifiers when retraining
nlp.kg_corpus = /data/nlp/corpus.txt

# default index with content, used to retrain the classifier models
nlp.kg_elastic = http://elasticsearch:9200/content

# classifier using a fasttext built model
nlp.classifier.kg-fasttext = nlpservice.nlp.classify.kg_classifier_fasttext
Expand All @@ -27,10 +32,6 @@ nlp.kg_kv_path = /data/nlp/corpus-ft
# needed to build the labels
nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/

; nlp.kg_model_path = /app/nlpservice/tests/fixtures/k-model.hdf
; nlp.kg_ft_path = /app/nlpservice/tests/fixtures/corpus-ft
; nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/

nlp.keyedvectors.corpus-ft = nlpservice.nlp.fasttext.corpus_kv_settings

nlp.cache = /data/cache
Expand Down

0 comments on commit 7ce8d34

Please sign in to comment.