(C) 2024 NLP-Lab
More details about the Hoosier Ellipsis Corpus can be found on the NLP-Lab pages. The GitHub repo contains links to other languages and useful code and scripts for data processing.
This repo contains the Arabic Ellipsis Sub-corpus of THEC.
Consult the data format specification for details about the structure of the files and the annotation standard used.
Please use the following snippet to cite our work.
@inproceedings{cavar-etal-2024-typology,
title = "The Typology of Ellipsis: A Corpus for Linguistic Analysis and Machine Learning Applications",
author = "Cavar, Damir and Mompelat, Ludovic and Abdo, Muhammad",
editor = "Hahn, Michael and Sorokin, Alexey and Kumar, Ritesh and Shcherbakov, Andreas and Otmakhova, Yulia and Yang, Jinrui and Serikov, Oleg and Rani, Priya and Ponti, Edoardo M. and Murado{\u{g}}lu, Saliha and Gao, Rena and Cotterell, Ryan and Vylomova, Ekaterina",
booktitle = "Proceedings of the 6th Workshop on Research in Computational Linguistic Typology and Multilingual NLP",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.sigtyp-1.6",
pages = "46--54"
}
@inproceedings{cavar-atal-2004-computing,
author = "Cavar, Damir and Zoran Tiganj and Ludovic Mompelat and Billy Dickson",
title = {Computing Ellipsis Constructions: Comparing Classical {NLP} and {LLM} Approaches},
booktitle = {2024 Meeting of the Society for Computation in Linguistics (SCiL)},
month = may,
year = {2024},
address = {},
publisher = {},
url = {},
pages = "--"
}