%load_ext autoreload
%autoreload 2from src.textlabelling.dataPrep import DataPrep
from src.textlabelling.labelling import Labelling
from src.textlabelling.nerstats import NerStats
from src.textlabelling.csvmodel import CSVModel
from src.textlabelling.tainner import TrainNer, save_model, evaluate
from src.textlabelling.dbconnect import Connect
from src.textlabelling.model import Model
import random
random.seed(0)!pwdTRAIN_DATA=[]Data_prep = DataPrep('xxxxxxxx.csv')
train, test = Data_prep.split_data()Data_prep.filename!head -n 6 xxxxxxxx.csvtrainset = DataPrep.text_generator(train)
number = 0
n_counter = 0
def main():
text =next(trainset)
text = text.lower()
print('')
print(text)
print('')
Label = nerlabelling.Labelling(text)
for item in Label.token_to_tuple():
print(item)
result, counter = Label.text_entities_construct()
TRAIN_DATA.append(result)
return countercounter = main()
number = number + 1
n_counter = n_counter + counter
#display.HTML("<p><b>Counter:</b><H1>"+str(number)+' '+str(n_counter)+"</H1></p>")
display.HTML("<table><tr><th><H1><center>Counter:</center></H1></th></tr><tr><th>Global</th><th>Internal</th></tr><tr><td><H1>"
+str(number)+
"</H1></td><td><H1>"
+str(n_counter)+
"</H1><td></tr></table>")Stats = NerStats()Stats.save_labelled_data(data=TRAIN_DATA, file_name='labelled_data/labelleddata.pkl')Stats.distribution_visualizer(data)TRAIN_DATA2 = Stats.load_labelled_data('labelled_data/labelleddat.pkl')data = Stats.data_distribution(TRAIN_DATA2)
data.items()#### v. Fix Training data for idx, value in enumerate(TRAIN_DATA2):
if idx == 1236:
#if 'payments' in value[1]['entities'][0]:
#print(idx, value[1]['entities'])
print(idx, value)TRAIN_DATA2[114] = ('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx', {'entities': [(0, 29, 'Payments'), (116, 128, 'Payments')]})model = 'model_vi'
n_accuracy = {}Trainer = TrainNer(TRAIN_DATA2, n_iter=150)
nlp,losses = Trainer.train_model()
#n_accuracy[model]=100-losses['ner']type(nlp)NerStats.save_model(nlp, model)n_accuracyTEST_DATA = Stats.load_labelled_data('labelled_data/labelledtest.pkl')evaluate(nlp, TEST_DATA)### 4. template = """
select created_date, reference_ticket, nps_verbatim,nps_score from table'
"""Model = Model('model_v1', template, 'config/config.ini','Redshift_prod')Model.create_table('public.nps_table')Model.template = templateModel.insert_to_redshift()