Skip to content

Commit

Permalink
Merge branch 'master' of github.com:eaudeweb/NLPService
Browse files Browse the repository at this point in the history
  • Loading branch information
tiberiuichim committed Jul 10, 2019
2 parents 9374256 + 9d5e7ae commit 82fff41
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 97 deletions.
47 changes: 40 additions & 7 deletions nlpservice/nlp/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from tensorflow.keras.utils import to_categorical
from textacy.preprocess import normalize_whitespace

from .fasttext import main as train_kv
from .models import get_model, gpu_session, nongpu_session
from .prepare import main as prepare_text
from .prepare import text_tokenize
from .utils import get_lemmatized_kg

Expand Down Expand Up @@ -202,9 +204,9 @@ def get_doc_labels(doc, kg):


def read_corpus(path):
""" Returns a list of documents. A doc is a list of sentences.
""" Takes a text file and returns a list of documents.
A sentence is space separated tokens (words)
A doc is a list of sentences. A sentence is space separated tokens (words)
"""

logger.info('Loading corpus')
Expand Down Expand Up @@ -269,9 +271,6 @@ def main(output, ftpath, corpus, kg_url, cpu):
ft_model = FastText.load(ftpath)
kg = get_lemmatized_kg(kg_url)

# import pdb
# pdb.set_trace()

if cpu:
sess = nongpu_session()
else:
Expand Down Expand Up @@ -419,9 +418,14 @@ def predict(text):

return list(pairs)

def train():
raise NotImplementedError

return

return {
'predict': predict,
'train': lambda: None,
'train': train,
'metadata': {},
}

Expand All @@ -435,6 +439,7 @@ def kg_classifier_keras(config):
model_path = settings['nlp.kg_model_path']
ft_model_path = settings['nlp.kg_kv_path']
kg_url = settings['nlp.kg_url']
kg_elastic = settings['nlp.kg_elastic']

kg = get_lemmatized_kg(kg_url)
labels = list(sorted(kg.keys()))
Expand All @@ -448,6 +453,8 @@ def kg_classifier_keras(config):
vocab = kv_model.wv.index2word
label_encoder = make_labelencoder(labels)

corpus_path = settings['nlp.kg_corpus']

def predict(text):
maxlen = model.inputs[0].get_shape()[1].value

Expand All @@ -458,8 +465,34 @@ def predict(text):

return list(pairs)

def train():
# pipeline is: get text from elastic, prepare kv model, train on text
logger.warning('Preparing corpus text')
prepare_text.callback(corpus_path, kg_elastic, None)

logger.warning('Preparing kv model')
train_kv.callback(corpus_path, ft_model_path)

logger.warning('Training Keras classifier')
out = main.callback(model_path, ft_model_path, corpus_path, kg_url,
False)

return out

return {
'predict': predict,
'metadata': {},
'train': lambda: None,
'train': train,
}
#
#
# @click.command()
# @click.argument('model', nargs=-1, required=True)
# def retrain(model):
# # TODO: we can't properly get models without an .ini file
#
# for name in model:
# suite = get_model(name)
# train = suite['train']
# logger.warning("Retraining %s", name)
# train()
59 changes: 21 additions & 38 deletions nlpservice/nlp/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""

import logging
from pathlib import Path

import click
from tqdm import tqdm
Expand All @@ -22,43 +21,6 @@ def trim(word, count, min_count):
return RULE_DEFAULT


# count = 0
#
#
# def counter(it):
# global count
#
# for line in it:
# line = line.strip()
#
# if not line:
# continue
#
# count += 1
# yield line.split(' ')
#

@click.command()
@click.argument('textfile')
@click.argument('output')
def main(textfile, output):
""" A script to generate FastText-based word embedings
"""

logger.setLevel(logging.WARNING)
model = FastText(size=100, window=3, sg=True, min_count=5,
seed=0, word_ngrams=True, trim_rule=trim)

with open(textfile) as f:
sentences = [l.strip().split(' ') for l in list(f)]

model.build_vocab(sentences=tqdm(sentences))
model.train(sentences=tqdm(sentences),
epochs=10, total_examples=len(sentences), workers=7)

model.save(output)


def similar_by_word(word, model):
ft = get_model(model)
wv = ft['model'].wv
Expand Down Expand Up @@ -89,3 +51,24 @@ def corpus_kv_settings(config):
settings = config.get_settings()

return settings['nlp.kg_kv_path']


@click.command()
@click.argument('textfile')
@click.argument('output')
def main(textfile, output):
""" A script to generate FastText-based word embedings
"""

logger.setLevel(logging.WARNING)
model = FastText(size=100, window=3, sg=True, min_count=5,
seed=0, word_ngrams=True, trim_rule=trim)

with open(textfile) as f:
sentences = [l.strip().split(' ') for l in list(f)]

model.build_vocab(sentences=tqdm(sentences))
model.train(sentences=tqdm(sentences),
epochs=10, total_examples=len(sentences), workers=7)

model.save(output)
1 change: 1 addition & 0 deletions nlpservice/nlp/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path

import click

import ftfy
import syntok.segmenter as segmenter
import textacy as tc
Expand Down
3 changes: 3 additions & 0 deletions nlpservice/nlpservice-ui/src/App.vue
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
<v-btn flat>
<router-link to="/classify">Classify</router-link>
</v-btn>
<v-btn flat>
<router-link to="/classify/retrain">Retrain</router-link>
</v-btn>

</v-toolbar>

Expand Down
11 changes: 4 additions & 7 deletions nlpservice/nlpservice-ui/src/components/Classify.vue
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<v-container grid-list-md>
<v-layout row wrap>
<v-flex xs12 md12>
<h2 class="headline font-weight-bold mb-3">Similarity</h2>
<h2 class="headline font-weight-bold mb-3">Classify</h2>
</v-flex>

<v-flex xs12 md6>
Expand All @@ -15,7 +15,7 @@

<v-flex xs12 md6>
<v-sheet color="purple lighten-3" elevation="1" min-height="10em">
<div v-for="s in scores">
<div v-for="s in scores" :key="s">
{{ s[0] }} - {{ s[1] }}
</div>
</v-sheet>
Expand Down Expand Up @@ -48,23 +48,20 @@
},
submit() {
axios
.post('http://localhost:6543/classify',
.post('http://localhost:6543/classify/' + this.model,
{
'text': this.text,
'model': this.model,
})
.then((resp) => {
this.scores = resp.data.result
console.log(resp)
})
}
},
mounted() {
axios
.get('http://localhost:6543/list-classifiers')
.get('http://localhost:6543/classify/')
.then((resp) => {
this.models = resp.data.result
console.log(resp)
})
}
}
Expand Down
3 changes: 1 addition & 2 deletions nlpservice/nlpservice-ui/src/components/KeyedVectors.vue
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

<v-flex xs12 md6>
<v-sheet color="purple lighten-3" elevation="1" min-height="10em">
<div v-for="s in scores" key="s">
<div v-for="s in scores" :key="s">
{{ s[0] }} - {{ s[1] }}
</div>
</v-sheet>
Expand Down Expand Up @@ -49,7 +49,6 @@
})
.then((resp) => {
this.scores = resp.data.result
console.log(resp)
})
}
}
Expand Down
59 changes: 59 additions & 0 deletions nlpservice/nlpservice-ui/src/components/Retrain.vue
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<template>
<v-container grid-list-md>
<v-layout row wrap>
<v-flex xs12 md12>
<h2 class="headline font-weight-bold mb-3">Retrain</h2>
</v-flex>

<v-flex xs12 md6>
<v-sheet color="green lighten-3" elevation="1" min-height="10em">
<h4>Set models for retraining</h4>
<v-select multiple :items="models" label="Model" @change="setModel"></v-select>
</v-sheet>
</v-flex>

<v-flex xs12 md12>
<v-btn primary @click="submit">Submit</v-btn>
</v-flex>

</v-layout>
</v-container>
</template>
<script>
import axios from 'axios'
export default {
components: {
},
data () {
return {
'models': [],
'retrain': []
}
},
methods: {
setModel(value) {
this.retrain = value
},
submit() {
axios
.post('http://localhost:6543/classify/',
{
'models': this.retrain,
})
.then((resp) => {
this.scores = resp.data.result
})
}
},
mounted() {
axios
.get('http://localhost:6543/classify/')
.then((resp) => {
this.models = resp.data.result
})
}
}
</script>
<style>
</style>
15 changes: 10 additions & 5 deletions nlpservice/nlpservice-ui/src/router/routes.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import Start from '../components/Start'
import Summarize from '../components/summarize/Main'
import Duplicate from '../components/duplicate/Main'
import Similarity from '../components/Similarity'
import KeyedVectors from '../components/KeyedVectors'
import Classify from '../components/Classify'
import Clusterize from '../components/cluster/Main'
import Duplicate from '../components/duplicate/Main'
import KeyedVectors from '../components/KeyedVectors'
import Retrain from '../components/Retrain'
import Similarity from '../components/Similarity'
import Summarize from '../components/summarize/Main'
import Start from '../components/Start'

const routes = [
{
Expand Down Expand Up @@ -35,6 +36,10 @@ const routes = [
path: '/classify',
component: Classify,
},
{
path: '/classify/retrain',
component: Retrain,
},
]

export default routes
Loading

0 comments on commit 82fff41

Please sign in to comment.