publications.bib

@thesis{etxaniz2021prometa,
  title    = {ProMeta: softwarearen garapenerako prozesuen definizio eta ezarpenerako sistema metaereduetan oinarrituta},
  author   = {Julen Etxaniz},
  year     = {2021},
  date     = {2021-10-08},
  url      = {https://addi.ehu.es/handle/10810/53310},
  journal  = {ADDI},
  abstract = {The objective of the project is to build a system for the definition and implementation of software development processes based on metamodels. In fact, there are several methodologies that are suitable for software development. It is important to define the information of these methodologies through models so that they can be managed flexibly in the future and improvements can be made. In addition, it is necessary to build a system that establishes a methodology using information from the model for use by development teams in projects. The OpenUP methodology was used for the development of the project and the CCII-N2016-02 standard for the drafting of the project documentation and memory.},
  keywords = {Software Engineering, Web Development}
}

@thesis{etxaniz2023grounding,
  title    = {Grounding Language Models for Compositional and Spatial Reasoning},
  author   = {Julen Etxaniz and Oier Lopez de Lacalle and Aitor Soroa},
  year     = {2023},
  date     = {2023-06-30},
  url      = {https://addi.ehu.es/handle/10810/61827},
  journal  = {ADDI},
  abstract = {Humans can learn to understand and process the distribution of space, and one of the initial tasks of Artificial Intelligence has been to show machines the relationships between space and the objects that appear in it. Humans naturally combine vision and textual information to acquire compositional and spatial relationships among objects, and when reading a text, we are able to mentally depict the spatial relationships that may appear in it. Thus, the visual differences between images depicting "a person sits and a dog stands" and "a person stands and a dog sits" are obvious for humans, but still not clear for automatic systems.

              In this project, we propose to evaluate grounded Neural Language models that can perform compositional and spatial reasoning. Neural Language models (LM) have shown impressive capabilities on many NLP tasks but, despite their success, they have been criticized for their lack of meaning. Vision-and-Language models (VLM), trained jointly on text and image data, have been offered as a response to such criticisms, but recent work has shown that these models struggle to ground spatial concepts properly. In the project, we evaluate state-of-the-art pre-trained and fine-tuned VLMs to understand their grounding level on compositional and spatial reasoning. We also propose a variety of methods to create synthetic datasets specially focused on compositional reasoning.

              We managed to accomplish all the objectives of this work. First, we improved the state-of-the-art in compositional reasoning. Next, we performed some zero-shot experiments on spatial reasoning. Finally, we explored three alternatives for synthetic dataset creation: text-to-image generation, image captioning and image retrieval. Code is released at https://github.com/juletx/spatial-reasoning and models are released at https://huggingface.co/juletxara.},
  keywords = {Artificial Intelligence, Deep Learning, Natural Language Processing, Computer Vision, Grounding, Visual Reasoning, Compositional Reasoning, Spatial Reasoning}
}

@article{etxaniz2023multilingual,
  title         = {Do Multilingual Language Models Think Better in English?},
  author        = {Julen Etxaniz and Gorka Azkune and Aitor Soroa and Oier Lopez de Lacalle and Mikel Artetxe},
  booktitle     = {NAACL 2024},
  year          = {2023},
  date          = {2023-08-02},
  eprint        = {2308.01223},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL},
  journal       = {arXiv},
  abstract      = {Translate-test is a popular technique to improve the performance of multilingual language models. This approach works by translating the input into English using an external machine translation system, and running inference over the translated input. However, these improvements can be attributed to the use of a separate translation system, which is typically trained on large amounts of parallel data not seen by the language model. In this work, we introduce a new approach called self-translate, which overcomes the need of an external translation system by leveraging the few-shot translation capabilities of multilingual language models. Experiments over 5 tasks show that self-translate consistently outperforms direct inference, demonstrating that language models are unable to leverage their full multilingual potential when prompted in non-English languages. Our code is available at https://github.com/juletx/self-translate.},
  keywords      = {Natural Language Processing, Large Language Models, Deep Learning, Multilinguality}
}

@article{sainz2023nlp,
  title         = {NLP Evaluation in trouble: On the Need to Measure LLM Data Contamination for each Benchmark},
  author        = {Oscar Sainz and Jon Ander Campos and Iker García-Ferrero and Julen Etxaniz and Oier Lopez de Lacalle and Eneko Agirre},
  booktitle     = {EMNLP 2023 Findings},
  year          = {2023},
  date          = {2023-10-27},
  eprint        = {2310.18018},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL},
  abstract      = {In this position paper, we argue that the classical evaluation on Natural Language Processing (NLP) tasks using annotated benchmarks is in trouble. The worst kind of data contamination happens when a Large Language Model (LLM) is trained on the test split of a benchmark, and then evaluated in the same benchmark. The extent of the problem is unknown, as it is not straightforward to measure. Contamination causes an overestimation of the performance of a contaminated model in a target benchmark and associated task with respect to their non-contaminated counterparts. The consequences can be very harmful, with wrong scientific conclusions being published while other correct ones are discarded. This position paper defines different levels of data contamination and argues for a community effort, including the development of automatic and semi-automatic measures to detect when data from a benchmark was exposed to a model, and suggestions for flagging papers with conclusions that are compromised by data contamination.},
  keywords      = {Natural Language Processing, Large Language Models, Evaluation, Data Contamination, Deep Learning}
}

@article{etxaniz2024latxa,
  title         = {Latxa: An Open Language Model and Evaluation Suite for Basque},
  author        = {Julen Etxaniz and Oscar Sainz and Naiara Perez and Itziar Aldabe and German Rigau and Eneko Agirre and Aitor Ormazabal and Mikel Artetxe and Aitor Soroa},
  booktitle     = {ACL 2024},
  year          = {2024},
  date          = {2024-03-29},
  eprint        = {2403.20266},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL},
  abstract      = {We introduce Latxa, a family of large language models for Basque ranging from 7 to 70 billion parameters. Latxa is based on Llama 2, which we continue pretraining on a new Basque corpus comprising 4.3M documents and 4.2B tokens. Addressing the scarcity of high-quality benchmarks for Basque, we further introduce 4 multiple choice evaluation datasets: EusProficiency, comprising 5,169 questions from official language proficiency exams; EusReading, comprising 352 reading comprehension questions; EusTrivia, comprising 1,715 trivia questions from 5 knowledge areas; and EusExams, comprising 16,774 questions from public examinations. In our extensive evaluation, Latxa outperforms all previous open models we compare to by a large margin. In addition, it is competitive with GPT-4 Turbo in language proficiency and understanding, despite lagging behind in reading comprehension and knowledge-intensive tasks. Both the Latxa family of models, as well as our new pretraining corpora and evaluation datasets, are publicly available under open licenses at https://github.com/hitz-zentroa/latxa. Our suite enables reproducible research on methods to build LLMs for low-resource languages.},
  keywords      = {Natural Language Processing, Large Language Models, Deep Learning, Multilinguality, Basque}
}

@article{heredia2024xnlieu,
  title         = {XNLIeu: a dataset for cross-lingual NLI in Basque},
  author        = {Maite Heredia and Julen Etxaniz and Muitze Zulaika and Xabier Saralegi and Jeremy Barnes and Aitor Soroa},
  booktitle     = {NAACL 2024},
  year          = {2024},
  date          = {2024-04-10},
  eprint        = {2404.06996},
  archiveprefix = {arXiv},
  primaryclass  = {cs.CL},
  abstract      = {XNLI is a popular Natural Language Inference (NLI) benchmark widely used to evaluate cross-lingual Natural Language Understanding (NLU) capabilities across languages. In this paper, we expand XNLI to include Basque, a low-resource language that can greatly benefit from transfer-learning approaches. The new dataset, dubbed XNLIeu, has been developed by first machine-translating the English XNLI corpus into Basque, followed by a manual post-edition step. We have conducted a series of experiments using mono- and multilingual LLMs to assess a) the effect of professional post-edition on the MT system; b) the best cross-lingual strategy for NLI in Basque; and c) whether the choice of the best cross-lingual strategy is influenced by the fact that the dataset is built by translation. The results show that post-edition is necessary and that the translate-train cross-lingual strategy obtains better results overall, although the gain is lower when tested in a dataset that has been built natively from scratch. Our code and datasets are publicly available under open licenses at https://github.com/hitz-zentroa/xnli-eu.},
  keywords      = {Natural Language Processing, Large Language Models, Deep Learning, Multilinguality, Basque}
}

@article{agirre2024ikergaitu,
  title     = {IKER-GAITU: research on language technology for Basque and other low-resource languages},
  author    = {Agirre, Eneko and Aldabe, Itziar and Arregi, Xabier and Artetxe, Mikel and Atutxa, Unai and Azurmendi, Ekhi and De la Iglesia, Iker and Etxaniz, Julen and García-Romillo, Victor and Hernaez-Rioja, Inma and others},
  year      = {2024},
  date      = {2024-04-15},
  booktitle = {PROJECTS & DEMOS SEPLN - CEDI 2024},
  abstract  = {The general objective of the IKER-GAITU project is to research on language technology to increase the presence of Basque in the digital environment. It will be carried out between 2023 and 2025 thanks to a grant from the Department of Culture and Language Policy of the Basque Government. Current techniques require enormous amounts of textual and oral data per language. On the other hand, the data available for Basque and other low-resource languages might not be enough to attain the same quality as larger languages with the current technology. For this reason, it is essential to research on language technology, so that low-resource languages are present with the same quality as the rest of the languages in these technologies. IKER-GAITU pursues the following research objectives: 1. A system that automatically captures the level of Basque proficiency, written and oral; 2. Bring pSersonalized voice technology to people with disabilities; 3. Spontaneous voice transcription, both when Basque and Spanish are mixed and when there are several speakers; 4. Textual conversational systems in Basque that match the quality of the most powerful large language models. In this project summary we present the results for the first year. More information at https://hitz.eus/iker-gaitu.},
  keywords  = {Natural Language Processing, Large Language Models, Deep Learning, Multilinguality, Basque}
}