Lazy loading of models; fix production setup

eaudeweb · Jul 11, 2019 · 7ce8d34 · 7ce8d34
1 parent 82fff41
commit 7ce8d34
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 16 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -31,6 +31,7 @@ RUN pip3 install -e /app
 RUN python -m nltk.downloader -d /data/nltk_data/ stopwords
 RUN python -m nltk.downloader -d /data/nltk_data/ punkt
 RUN python -m nltk.downloader -d /data/nltk_data/ wordnet
+RUN python -m nltk.downloader -d /data/nltk_data/ averaged_perceptron_tagger
 
 EXPOSE 6543
 WORKDIR /app

diff --git a/nlpservice/__init__.py b/nlpservice/__init__.py
@@ -21,7 +21,10 @@ def prepare_model_loaders(config, prefix, model_loader):
 
     def config_wrapper(factory):
         def inner():
-            return factory(config)
+            try:
+                return factory(config)
+            except:
+                logger.exception("Could not run model factory %r", factory)
 
         return inner
 

diff --git a/nlpservice/nlp/classify.py b/nlpservice/nlp/classify.py
@@ -441,21 +441,29 @@ def kg_classifier_keras(config):
     kg_url = settings['nlp.kg_url']
     kg_elastic = settings['nlp.kg_elastic']
 
-    kg = get_lemmatized_kg(kg_url)
-    labels = list(sorted(kg.keys()))
+    corpus_path = settings['nlp.kg_corpus']
 
-    session = nongpu_session()
+    loaded = []
 
-    with session.as_default():
-        model = load_model(model_path)
+    def load():
+        kg = get_lemmatized_kg(kg_url)
+        labels = list(sorted(kg.keys()))
+        session = nongpu_session()
 
-    kv_model = FastText.load(ft_model_path)
-    vocab = kv_model.wv.index2word
-    label_encoder = make_labelencoder(labels)
+        with session.as_default():
+            model = load_model(model_path)
 
-    corpus_path = settings['nlp.kg_corpus']
+        kv_model = FastText.load(ft_model_path)
+        vocab = kv_model.wv.index2word
+        label_encoder = make_labelencoder(labels)
+
+        loaded.extend(model, vocab, label_encoder)
 
     def predict(text):
+        if not loaded:
+            load()
+
+        model, vocab, label_encoder = loaded
         maxlen = model.inputs[0].get_shape()[1].value
 
         k = _predict(text, model, label_encoder, vocab, maxlen)

diff --git a/production.ini b/production.ini
@@ -9,8 +9,13 @@ pyramid.debug_templates = false
 pyramid.default_locale_name = en
 
 nlp.tf_model_cache_path = /data/model_cache
-;
-; nlp.classifiers.kg = nlpservice.nlp.classify.kg_classify_settings
+
+# save downloaded corpus text to this file;
+# Used by both defined classifiers when retraining
+nlp.kg_corpus = /data/nlp/corpus.txt
+
+# default index with content, used to retrain the classifier models
+nlp.kg_elastic = http://elasticsearch:9200/content
 
 # classifier using a fasttext built model
 nlp.classifier.kg-fasttext = nlpservice.nlp.classify.kg_classifier_fasttext
@@ -27,10 +32,6 @@ nlp.kg_kv_path = /data/nlp/corpus-ft
 # needed to build the labels
 nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/
 
-; nlp.kg_model_path = /app/nlpservice/tests/fixtures/k-model.hdf
-; nlp.kg_ft_path = /app/nlpservice/tests/fixtures/corpus-ft
-; nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/
-
 nlp.keyedvectors.corpus-ft = nlpservice.nlp.fasttext.corpus_kv_settings
 
 nlp.cache = /data/cache