From 7ce8d343a3604f116bfbc594bf41843ba717458f Mon Sep 17 00:00:00 2001 From: Tiberiu Ichim Date: Thu, 11 Jul 2019 17:43:47 +0300 Subject: [PATCH] Lazy loading of models; fix production setup --- Dockerfile | 1 + nlpservice/__init__.py | 5 ++++- nlpservice/nlp/classify.py | 26 +++++++++++++++++--------- production.ini | 13 +++++++------ 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/Dockerfile b/Dockerfile index e96750b..5457566 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,6 +31,7 @@ RUN pip3 install -e /app RUN python -m nltk.downloader -d /data/nltk_data/ stopwords RUN python -m nltk.downloader -d /data/nltk_data/ punkt RUN python -m nltk.downloader -d /data/nltk_data/ wordnet +RUN python -m nltk.downloader -d /data/nltk_data/ averaged_perceptron_tagger EXPOSE 6543 WORKDIR /app diff --git a/nlpservice/__init__.py b/nlpservice/__init__.py index d7a1c41..d307de2 100755 --- a/nlpservice/__init__.py +++ b/nlpservice/__init__.py @@ -21,7 +21,10 @@ def prepare_model_loaders(config, prefix, model_loader): def config_wrapper(factory): def inner(): - return factory(config) + try: + return factory(config) + except: + logger.exception("Could not run model factory %r", factory) return inner diff --git a/nlpservice/nlp/classify.py b/nlpservice/nlp/classify.py index 874186d..1134c73 100644 --- a/nlpservice/nlp/classify.py +++ b/nlpservice/nlp/classify.py @@ -441,21 +441,29 @@ def kg_classifier_keras(config): kg_url = settings['nlp.kg_url'] kg_elastic = settings['nlp.kg_elastic'] - kg = get_lemmatized_kg(kg_url) - labels = list(sorted(kg.keys())) + corpus_path = settings['nlp.kg_corpus'] - session = nongpu_session() + loaded = [] - with session.as_default(): - model = load_model(model_path) + def load(): + kg = get_lemmatized_kg(kg_url) + labels = list(sorted(kg.keys())) + session = nongpu_session() - kv_model = FastText.load(ft_model_path) - vocab = kv_model.wv.index2word - label_encoder = make_labelencoder(labels) + with session.as_default(): + model = load_model(model_path) - corpus_path = settings['nlp.kg_corpus'] + kv_model = FastText.load(ft_model_path) + vocab = kv_model.wv.index2word + label_encoder = make_labelencoder(labels) + + loaded.extend(model, vocab, label_encoder) def predict(text): + if not loaded: + load() + + model, vocab, label_encoder = loaded maxlen = model.inputs[0].get_shape()[1].value k = _predict(text, model, label_encoder, vocab, maxlen) diff --git a/production.ini b/production.ini index a62c044..d447caf 100755 --- a/production.ini +++ b/production.ini @@ -9,8 +9,13 @@ pyramid.debug_templates = false pyramid.default_locale_name = en nlp.tf_model_cache_path = /data/model_cache -; -; nlp.classifiers.kg = nlpservice.nlp.classify.kg_classify_settings + +# save downloaded corpus text to this file; +# Used by both defined classifiers when retraining +nlp.kg_corpus = /data/nlp/corpus.txt + +# default index with content, used to retrain the classifier models +nlp.kg_elastic = http://elasticsearch:9200/content # classifier using a fasttext built model nlp.classifier.kg-fasttext = nlpservice.nlp.classify.kg_classifier_fasttext @@ -27,10 +32,6 @@ nlp.kg_kv_path = /data/nlp/corpus-ft # needed to build the labels nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/ -; nlp.kg_model_path = /app/nlpservice/tests/fixtures/k-model.hdf -; nlp.kg_ft_path = /app/nlpservice/tests/fixtures/corpus-ft -; nlp.kg_url = http://nginx/api/knowledge-graph/dump_all/ - nlp.keyedvectors.corpus-ft = nlpservice.nlp.fasttext.corpus_kv_settings nlp.cache = /data/cache