Skip to content

Commit

Permalink
#65 add new abbreviations and retrain models
Browse files Browse the repository at this point in the history
  • Loading branch information
atarchetti committed Feb 8, 2023
1 parent b40eae5 commit 975cc18
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 25 deletions.
4 changes: 3 additions & 1 deletion data/dicts/mexico_abbreviations.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ AV,AVENUE
NO,NUMBER
NUM,NUMBER
PO,POST OFFICE
P.O,POST OFFICE
BLVD,BOULEVARD
LT,LOTE
MZ,MANZANA
CDMX,Ciudad de México
DF,Distrito Federal
Binary file not shown.
Binary file not shown.
9 changes: 4 additions & 5 deletions examples/MEXICO_predict_example_no_embeddings copy.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@
"outputs": [],
"source": [
"file_path = '../data/tmp/AI_LATA_ADDRESS_MEX_modificado.csv'\n",
"# df = pd.read_csv(file_path, sep='¨', nrows=None, engine='python')\n",
"df = pd.read_csv(file_path, sep='¨', nrows=5000, engine='python')"
"df = pd.read_csv(file_path, sep='¨', nrows=None, engine='python')"
]
},
{
Expand Down Expand Up @@ -176,8 +175,8 @@
{
"data": {
"text/plain": [
"0.0 4984\n",
"1.0 16\n",
"0.0 2518008\n",
"1.0 2288\n",
"Name: prediction_is_key_smash, dtype: int64"
]
},
Expand All @@ -201,7 +200,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand Down
20 changes: 10 additions & 10 deletions examples/MEXICO_retrain_predict_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@
{
"data": {
"text/plain": [
"valid 1344182\n",
"key_smash 661\n",
"valid 1344254\n",
"key_smash 657\n",
"contains_email 569\n",
"contains_exactly_the_word_test 177\n",
"only_special_characters 144\n",
Expand Down Expand Up @@ -312,9 +312,9 @@
{
"data": {
"text/plain": [
"valid 2511507\n",
"valid 2511527\n",
"contains_context_invalid_words 3079\n",
"key_smash 1514\n",
"key_smash 1494\n",
"only_special_characters 1291\n",
"contains_email 1048\n",
"contains_exactly_the_word_test 667\n",
Expand Down Expand Up @@ -357,10 +357,10 @@
"\u001b[33mtranning model...\u001b[37m\n",
"\u001b[32mdone\u001b[37m\n",
"\u001b[33mget model score...\u001b[37m\n",
"\u001b[1maccuracy -> \u001b[22m0.9910256410256411\n",
"\u001b[1mprecision -> \u001b[22m0.9776536312849162\n",
"\u001b[1mrecall -> \u001b[22m0.9831460674157303\n",
"\u001b[1mf1 -> \u001b[22m0.9803921568627452\n"
"\u001b[1maccuracy -> \u001b[22m0.9922879177377892\n",
"\u001b[1mprecision -> \u001b[22m0.9903846153846154\n",
"\u001b[1mrecall -> \u001b[22m0.9809523809523809\n",
"\u001b[1mf1 -> \u001b[22m0.985645933014354\n"
]
}
],
Expand Down Expand Up @@ -391,8 +391,8 @@
{
"data": {
"text/plain": [
"0.0 1343185\n",
"1.0 2936\n",
"0.0 1342240\n",
"1.0 3949\n",
"Name: prediction, dtype: int64"
]
},
Expand Down
18 changes: 9 additions & 9 deletions examples/MEXICO_retrain_predict_example_no_embedding.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@
{
"data": {
"text/plain": [
"valid 1344182\n",
"key_smash 661\n",
"valid 1344254\n",
"key_smash 657\n",
"contains_email 569\n",
"contains_exactly_the_word_test 177\n",
"only_special_characters 144\n",
Expand Down Expand Up @@ -289,9 +289,9 @@
{
"data": {
"text/plain": [
"valid 2511507\n",
"valid 2511527\n",
"contains_context_invalid_words 3079\n",
"key_smash 1514\n",
"key_smash 1494\n",
"only_special_characters 1291\n",
"contains_email 1048\n",
"contains_exactly_the_word_test 667\n",
Expand Down Expand Up @@ -334,10 +334,10 @@
"\u001b[33mtranning model...\u001b[37m\n",
"\u001b[32mdone\u001b[37m\n",
"\u001b[33mget model score...\u001b[37m\n",
"\u001b[1maccuracy -> \u001b[22m1.0\n",
"\u001b[1mprecision -> \u001b[22m1.0\n",
"\u001b[1maccuracy -> \u001b[22m0.9987146529562982\n",
"\u001b[1mprecision -> \u001b[22m0.9946524064171123\n",
"\u001b[1mrecall -> \u001b[22m1.0\n",
"\u001b[1mf1 -> \u001b[22m1.0\n"
"\u001b[1mf1 -> \u001b[22m0.9973190348525469\n"
]
}
],
Expand Down Expand Up @@ -368,8 +368,8 @@
{
"data": {
"text/plain": [
"0.0 1345148\n",
"1.0 973\n",
"0.0 1345413\n",
"1.0 776\n",
"Name: prediction, dtype: int64"
]
},
Expand Down

0 comments on commit 975cc18

Please sign in to comment.