Skip to content

Commit

Permalink
Added parameter to store removed prepositions. Fixed an issue with li…
Browse files Browse the repository at this point in the history
…nebreaks.
  • Loading branch information
mille-s committed Feb 22, 2023
1 parent bafe975 commit 8223a6a
Showing 1 changed file with 24 additions and 18 deletions.
42 changes: 24 additions & 18 deletions UD_Converter_release.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"# Download resources for UD to PredArg conversion"
],
"metadata": {
"id": "dHQBcKE6CCIC"
"id": "clb0W-W2e9x0"
}
},
{
Expand All @@ -58,7 +58,7 @@
"# Install Stanza for UD parsing"
],
"metadata": {
"id": "3bvg9S78CJ7q"
"id": "no-bubGqfBMD"
}
},
{
Expand All @@ -77,7 +77,7 @@
" os.makedirs(output_folder)"
],
"metadata": {
"id": "VnFqJeByB7om"
"id": "2IBkOS3ue89k"
},
"execution_count": null,
"outputs": []
Expand All @@ -88,7 +88,7 @@
"#Run Stanza (upload text file first)"
],
"metadata": {
"id": "BiEFSiDSPeX8"
"id": "BjzmcIp9fK0c"
}
},
{
Expand Down Expand Up @@ -142,7 +142,7 @@
" #print('\\n')\n",
" for sent in doc.sentences:\n",
" #UD_sentence = ''\n",
" UD_sentence = '# text = '+doc.text\n",
" UD_sentence = '# text = '+doc.text+'\\n'\n",
" for word in sent.words:\n",
" columns = []\n",
" columns.append(str(word.id)) #(0)\n",
Expand All @@ -167,11 +167,11 @@
" with open(os.path.join(output_folder, language+'_'+filename+'_STZ.conllu'), \"w\", encoding=\"utf-8\") as f:\n",
" f.write(UD_text)\n",
"\n",
" copy_files(output_folder, '/content/UD_Converter/inputs')\n"
" copy_files(output_folder, '/content/UD_Converter/inputs')"
],
"metadata": {
"cellView": "form",
"id": "xKy5hZEVCBJW"
"id": "NtespU2ufO7L"
},
"execution_count": null,
"outputs": []
Expand All @@ -182,7 +182,7 @@
"# Run conversion to Predicate-argument structures"
],
"metadata": {
"id": "O3KjrNHdOpfh"
"id": "S-Lba9RkfSoz"
}
},
{
Expand Down Expand Up @@ -227,6 +227,8 @@
"strPerFile = '10000'\n",
"# perform structure well-formedness and file alignment checks and create debug files ('yes'/'no')\n",
"debug = 'yes'\n",
"# Keep or erase intermediate files produced by the different components\n",
"keep_intermediate_files = 'no'\n",
"\n",
"#============================================================================================================\n",
"# CONVERSION PARAMETERS ('yes'/'no')\n",
Expand Down Expand Up @@ -265,6 +267,8 @@
"parentheses = default\n",
"# keep quotation marks in the deep structure (SRST: 'no')\n",
"quotationMarks = default\n",
"# keep label of adpositions in the deep structure on the node of the word it was attached to in UD (SRST: 'no')\n",
"adposition = 'yes'\n",
"\n",
"#============================================================================================================\n",
"# DO NOT EDIT BELOW\n",
Expand Down Expand Up @@ -365,7 +369,7 @@
" originalID = 'no'\n",
"convertFolder = tmpIn\n",
"path_conllu2conll = os.path.join(path_jars, 'conllu2conll.py')\n",
"!python {path_conllu2conll} {inputFormat} {convertFolder} {originalID} {originalForm} {originalXpos} {parentheses} {quotationMarks} {orderPunc} {orderConj} {orderMWE} {track} {dt} {sentOutTmp} {reduce_tree}\n",
"!python {path_conllu2conll} {inputFormat} {convertFolder} {originalID} {originalForm} {originalXpos} {parentheses} {quotationMarks} {orderPunc} {orderConj} {orderMWE} {track} {dt} {sentOutTmp} {reduce_tree} {adposition}\n",
"# Code for offline usage; replace previous line by following:\n",
"# subprocess.call(['python', 'conllu2conll.py', inputFormat, convertFolder, originalID, originalForm, originalXpos, parentheses, quotationMarks, orderPunc, orderConj, orderMWE, track, dt, sentOutTmp, reduce_tree])\n",
"\n",
Expand Down Expand Up @@ -418,11 +422,12 @@
" for line in proc.stdout:\n",
" sys.stdout.write(line)\n",
" logfile.write(line)\n",
" \n",
"try:\n",
" shutil.rmtree(tmpIn)\n",
"except Exception as e:\n",
" print(e)\n",
"\n",
"if keep_intermediate_files == 'no': \n",
" try:\n",
" shutil.rmtree(tmpIn)\n",
" except Exception as e:\n",
" print(e)\n",
"\n",
"print('\\n==============================\\nConcatenating output files...\\n==============================')\n",
"# File concatenation: the big files that had been split in smaller files are brought back together.\n",
Expand Down Expand Up @@ -458,10 +463,11 @@
" # Code for offline usage; replace previous line by following:\n",
" # subprocess.call(['python', 'concatenateFiles.py', sentOutTmp, sentOut, 'utf-8', 'utf-8', 'txt', 'sent', track, dt])\n",
"\n",
"try:\n",
" shutil.rmtree(tmpOut)\n",
"except Exception as e:\n",
" print(e)\n",
"if keep_intermediate_files == 'no': \n",
" try:\n",
" shutil.rmtree(tmpOut)\n",
" except Exception as e:\n",
" print(e)\n",
"\n",
"if debug == 'yes':\n",
" print('\\n==============================\\nChecking outputs...\\n==============================')\n",
Expand Down

0 comments on commit 8223a6a

Please sign in to comment.