From 4524bda172c59121bc3bf7b126a60722ec99a833 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Mon, 18 Sep 2023 10:53:14 +0200 Subject: [PATCH 01/16] Fix bugs in implementation of Text and Word Due to a mistake when calculating the length (mistakenly calculating string length, not word length), the last word in a text had the problem of raising an exception because the check if there is a next word did go wrong (as the nextWordIndex was always smaller than the actual maximum index in the words list) --- .../textproviderjson/textobject/TextImpl.java | 2 +- .../textproviderjson/textobject/WordImpl.java | 15 ++++++++++----- .../corenlp/textprocessor/TextProcessor.java | 7 +++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java index 88647367e..6d8b72647 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java @@ -29,7 +29,7 @@ public void setSentences(ImmutableList sentences) { public int getLength() { int length = 0; for (Sentence sentence : sentences) { - length += sentence.getText().length(); + length += sentence.getWords().size(); } return length; } diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java index 7703c8967..808b45f16 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java @@ -7,7 +7,12 @@ import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; -import edu.kit.kastel.mcse.ardoco.core.api.text.*; +import edu.kit.kastel.mcse.ardoco.core.api.text.DependencyTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.POSTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.Phrase; +import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; +import edu.kit.kastel.mcse.ardoco.core.api.text.Text; +import edu.kit.kastel.mcse.ardoco.core.api.text.Word; public class WordImpl implements Word { @@ -125,13 +130,13 @@ public boolean equals(Object o) { return true; if (!(o instanceof WordImpl word)) return false; - return indexInText == word.indexInText && sentenceNo == word.sentenceNo && Objects.equals(preWord, word.preWord) && Objects.equals(nextWord, - word.nextWord) && Objects.equals(text, word.text) && posTag == word.posTag && Objects.equals(lemma, word.lemma) && Objects.equals( - ingoingDependencies, word.ingoingDependencies) && Objects.equals(outgoingDependencies, word.outgoingDependencies); + return indexInText == word.indexInText && sentenceNo == word.sentenceNo && Objects.equals(text, word.text) && posTag == word.posTag && Objects.equals( + lemma, word.lemma) && Objects.equals(ingoingDependencies, word.ingoingDependencies) && Objects.equals(outgoingDependencies, + word.outgoingDependencies); } @Override public int hashCode() { - return Objects.hash(indexInText, preWord, nextWord, sentenceNo, text, posTag, lemma, ingoingDependencies, outgoingDependencies); + return Objects.hash(indexInText, sentenceNo, text, posTag, lemma, ingoingDependencies, outgoingDependencies); } } diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessor.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessor.java index 163269750..bba62d495 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessor.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessor.java @@ -37,9 +37,8 @@ public Text processText(String inputText) { int numberOfTry = 0; while (numberOfTry < MAX_FAILED_SERVICE_REQUESTS) { try { - Text processedText = processService(inputText); - logger.info("Processed text with CoreNLP microservice."); - return processedText; + logger.info("Processing text with CoreNLP microservice."); + return processService(inputText); } catch (IOException e) { numberOfTry++; logger.warn("Could not process text with CoreNLP microservice. Trying again. ", e); @@ -50,7 +49,7 @@ public Text processText(String inputText) { } logger.warn("Could not process text with CoreNLP microservice. Processing locally instead."); } - logger.info("Processed text locally."); + logger.info("Processing text locally."); return processLocally(inputText); } From 406977a9c754b1c7b09ce851d23b8faa1f25ab25 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Tue, 19 Sep 2023 09:25:46 +0200 Subject: [PATCH 02/16] Exchange GET with POST for microservice --- .../corenlp/config/ConfigManager.java | 2 +- .../textprocessor/HttpCommunicator.java | 22 +++++++++++++++ .../textprocessor/TextProcessorService.java | 27 ++++++++++++++++--- .../src/main/resources/config.properties | 4 +-- 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/config/ConfigManager.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/config/ConfigManager.java index 3135eb6c6..8bcf043bb 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/config/ConfigManager.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/config/ConfigManager.java @@ -32,7 +32,7 @@ private ConfigManager() { logger.warn("Could not load config file. ", e); properties.setProperty(PROPERTY_MICROSERVICE_URL, "http://localhost:8080"); properties.setProperty(PROPERTY_NLP_PROVIDER_SOURCE, "local"); - properties.setProperty(PROPERTY_CORENLP_SERVICE, "/stanfordnlp?text="); + properties.setProperty(PROPERTY_CORENLP_SERVICE, "/stanfordnlp"); properties.setProperty(PROPERTY_HEALTH_SERVICE, "/stanfordnlp/health"); } if (System.getenv("MICROSERVICE_URL") != null) { diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java index 4aa468dac..45dda409e 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java @@ -2,14 +2,18 @@ package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.textprocessor; import java.io.IOException; +import java.nio.charset.StandardCharsets; import org.apache.hc.client5.http.auth.AuthScope; import org.apache.hc.client5.http.auth.UsernamePasswordCredentials; import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.classic.methods.HttpPost; import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider; import org.apache.hc.client5.http.impl.classic.BasicHttpClientResponseHandler; import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; import org.apache.hc.client5.http.impl.classic.HttpClients; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.io.entity.StringEntity; public class HttpCommunicator { @@ -27,4 +31,22 @@ public String sendAuthenticatedGetRequest(String requestUrl) throws IOException return httpClient.execute(request, new BasicHttpClientResponseHandler()); } } + + public String sendAuthenticatedPostRequest(String requestUrl, String body) throws IOException { + String username = System.getenv("USERNAME"); + String password = System.getenv("PASSWORD"); + if (username == null || password == null) { + throw new IOException("Environment variables USERNAME and PASSWORD must be set."); + } + + HttpPost request = new HttpPost(requestUrl); + StringEntity requestEntity = new StringEntity(body, ContentType.APPLICATION_JSON, StandardCharsets.UTF_8.toString(), false); //TODO + request.setEntity(requestEntity); + + BasicCredentialsProvider provider = new BasicCredentialsProvider(); + provider.setCredentials(new AuthScope(null, -1), new UsernamePasswordCredentials(username, password.toCharArray())); + try (CloseableHttpClient httpClient = HttpClients.custom().setDefaultCredentialsProvider(provider).build()) { + return httpClient.execute(request, new BasicHttpClientResponseHandler()); + } + } } diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java index 59d281489..2ae386523 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java @@ -5,6 +5,10 @@ import java.net.URLEncoder; import java.nio.charset.StandardCharsets; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import edu.kit.kastel.mcse.ardoco.core.api.text.Text; import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config.ConfigManager; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.converter.DtoToObjectConverter; @@ -17,6 +21,7 @@ * This text processor processes texts by sending requests to a microservice, which provides text processing using CoreNLP. */ public class TextProcessorService { + private static final Logger logger = LoggerFactory.getLogger(TextProcessorService.class); /** * processes and annotates a given text by sending requests to a microservice @@ -32,14 +37,30 @@ public Text processText(String inputText) throws IOException, InvalidJsonExcepti } private String sendCorenlpRequest(String inputText) throws IOException { - inputText = URLEncoder.encode(inputText, StandardCharsets.UTF_8); + String encodedText = encodeText(inputText); ConfigManager configManager = ConfigManager.INSTANCE; - String requestUrl = configManager.getMicroserviceUrl() + configManager.getCorenlpService() + inputText; - return sendAuthenticatedGetRequest(requestUrl); + String requestUrl = configManager.getMicroserviceUrl() + configManager.getCorenlpService(); + return sendAuthenticatedPostRequest(requestUrl, encodedText); + } + + private static String encodeText(String inputText) { + String encodedText = URLEncoder.encode(inputText, StandardCharsets.UTF_8); + return encodedText; } private String sendAuthenticatedGetRequest(String requestUrl) throws IOException { HttpCommunicator httpCommunicator = new HttpCommunicator(); return httpCommunicator.sendAuthenticatedGetRequest(requestUrl); } + + private String sendAuthenticatedPostRequest(String requestUrl, String encodedText) throws IOException { + HttpCommunicator httpCommunicator = new HttpCommunicator(); + String body = getRequestBodyString(encodedText); + return httpCommunicator.sendAuthenticatedPostRequest(requestUrl, body); + } + + @NotNull + private static String getRequestBodyString(String encodedText) { + return "{\"text\": \"" + encodedText + "\"}"; + } } diff --git a/stages/text-preprocessing/src/main/resources/config.properties b/stages/text-preprocessing/src/main/resources/config.properties index e25bff7ea..bf99a5e1e 100644 --- a/stages/text-preprocessing/src/main/resources/config.properties +++ b/stages/text-preprocessing/src/main/resources/config.properties @@ -1,5 +1,5 @@ nlpProviderSource=microservice -microserviceUrl= http://localhost:8080 -corenlpService=/stanfordnlp?text= +microserviceUrl=http://localhost:8080 +corenlpService=/stanfordnlp healthService=/stanfordnlp/health From d9b0abfb3274c4ce60e77d220161ee2359966e83 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Tue, 19 Sep 2023 12:20:54 +0200 Subject: [PATCH 03/16] Remove duplicate PosTag class --- .../mcse/ardoco/core/api/text/POSTag.java | 20 ++++++- .../converter/ObjectToDtoConverter.java | 12 ++-- .../core/textproviderjson/dto/PosTag.java | 58 ------------------- .../core/textproviderjson/dto/WordDto.java | 10 ++-- .../core/textproviderjson/TestUtil.java | 56 +++++++++--------- .../converter/JsonConverterTest.java | 4 +- 6 files changed, 65 insertions(+), 95 deletions(-) delete mode 100644 framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/PosTag.java diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java index 935df071f..eed06d529 100644 --- a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java @@ -1,12 +1,15 @@ /* Licensed under MIT 2021-2023. */ package edu.kit.kastel.mcse.ardoco.core.api.text; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; /** * This class represents all valid part-of-speech (pos) tags - * */ public enum POSTag { //@formatter:off @@ -77,4 +80,19 @@ public boolean isVerb() { public boolean isNoun() { return getTag().startsWith("NN"); } + + @JsonValue + public String toValue() { + return getTag(); + } + + @JsonCreator + public static POSTag forValue(String value) throws IOException { + try { + return get(value); + } catch (IllegalArgumentException e) { + throw new IOException("Cannot deserialize PosTag"); + } + } + } diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/ObjectToDtoConverter.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/ObjectToDtoConverter.java index 0d19e99ff..6bf8b7ed2 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/ObjectToDtoConverter.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/ObjectToDtoConverter.java @@ -9,10 +9,14 @@ import org.eclipse.collections.api.list.ImmutableList; -import edu.kit.kastel.mcse.ardoco.core.api.text.*; +import edu.kit.kastel.mcse.ardoco.core.api.text.DependencyTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.POSTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.Phrase; +import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; +import edu.kit.kastel.mcse.ardoco.core.api.text.Text; +import edu.kit.kastel.mcse.ardoco.core.api.text.Word; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.IncomingDependencyDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.OutgoingDependencyDto; -import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.PosTag; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.SentenceDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.TextDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.WordDto; @@ -27,7 +31,7 @@ public class ObjectToDtoConverter { /** * converts an ArDoCo text into a text DTO - * + * * @param text the ArDoCo text * @return the text DTO */ @@ -74,7 +78,7 @@ private WordDto convertToWordDTO(Word word) throws NotConvertableException { wordDTO.setText(word.getText()); wordDTO.setLemma(word.getLemma()); try { - wordDTO.setPosTag(PosTag.forValue(word.getPosTag().toString())); + wordDTO.setPosTag(POSTag.forValue(word.getPosTag().toString())); } catch (IOException e) { throw new NotConvertableException(String.format("IOException when converting word with id %d to WordDto: PosTag not found.", wordDTO.getId())); } diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/PosTag.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/PosTag.java deleted file mode 100644 index 8b9415729..000000000 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/PosTag.java +++ /dev/null @@ -1,58 +0,0 @@ -/* Licensed under MIT 2023. */ -package edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto; - -import java.io.IOException; - -import com.fasterxml.jackson.annotation.*; - -public enum PosTag { - //@formatter:off - ADJECTIVE("JJ"), ADJECTIVE_COMPARATIVE(ADJECTIVE + "R"), ADJECTIVE_SUPERLATIVE(ADJECTIVE + "S"), ADVERB("RB"), ADVERB_COMPARATIVE(ADVERB + "R"), ADVERB_SUPERLATIVE(ADVERB + "S"), ADVERB_WH( - "W" + ADVERB), CONJUNCTION_COORDINATING("CC"), CONJUNCTION_SUBORDINATING("IN"), CARDINAL_NUMBER("CD"), DETERMINER("DT"), DETERMINER_WH("W" + DETERMINER), EXISTENTIAL_THERE( - "EX"), FOREIGN_WORD("FW"), LIST_ITEM_MARKER("LS"), NOUN("NN"), NOUN_PLURAL(NOUN + "S"), NOUN_PROPER_SINGULAR(NOUN + "P"), NOUN_PROPER_PLURAL(NOUN + "PS"), PREDETERMINER( - "PDT"), POSSESSIVE_ENDING("POS"), PRONOUN_PERSONAL("PRP"), PRONOUN_POSSESSIVE("PRP$"), PRONOUN_POSSESSIVE_WH("WP$"), PRONOUN_WH("WP"), PARTICLE("RP"), SYMBOL("SYM"), TO( - "TO"), INTERJECTION("UH"), VERB("VB"), VERB_PAST_TENSE(VERB + "D"), VERB_PARTICIPLE_PRESENT(VERB + "G"), VERB_PARTICIPLE_PAST(VERB + "N"), VERB_SINGULAR_PRESENT_NONTHIRD_PERSON( - VERB + "P"), VERB_SINGULAR_PRESENT_THIRD_PERSON(VERB + "Z"), VERB_MODAL("MD"), CLOSER("."), COMMA(","), COLON(":"), LEFT_PAREN("-LRB-"), RIGHT_PAREN("-RRB-"), NONE("-NONE-"), OPEN_QUOTE( - "``"), CLOSE_QUOTE("''"), DOLLAR("$"), HASHTAG("#"), HYPH("HYPH"), NFP("NFP"), ADD("ADD"), AFX("AFX"), GW("GW"), XX("XX"); - //@formatter:on - - private final String tag; - - PosTag(String tag) { - this.tag = tag; - } - - /** - * Returns the encoding for this part-of-speech. - * - * @return A string representing a Penn Treebank encoding for an English part-of-speech. - */ - @Override - public String toString() { - return getTag(); - } - - /** - * Gets the tag information. - * - * @return the tag - */ - public String getTag() { - return tag; - } - - @JsonValue - public String toValue() { - return getTag(); - } - - @JsonCreator - public static PosTag forValue(String value) throws IOException { - for (PosTag v : values()) { - if (value.equals(v.getTag())) { - return v; - } - } - throw new IOException("Cannot deserialize PosTag"); - } -} diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/WordDto.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/WordDto.java index cb3f0c069..0886c8ac3 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/WordDto.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/dto/WordDto.java @@ -5,7 +5,9 @@ import java.util.List; import java.util.Objects; -import com.fasterxml.jackson.annotation.*; +import com.fasterxml.jackson.annotation.JsonProperty; + +import edu.kit.kastel.mcse.ardoco.core.api.text.POSTag; /** * Definition of a word @@ -17,7 +19,7 @@ public class WordDto { private List outgoingDependencies = new ArrayList<>(); private long sentenceNo; private String text; - private PosTag posTag; + private POSTag posTag; /** * The id of the word. Should be ascending from 1 for the first word in the text. @@ -72,12 +74,12 @@ public void setOutgoingDependencies(List value) { } @JsonProperty("posTag") - public PosTag getPosTag() { + public POSTag getPosTag() { return posTag; } @JsonProperty("posTag") - public void setPosTag(PosTag value) { + public void setPosTag(POSTag value) { this.posTag = value; } diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java index e362e16ca..5efae0cb4 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java @@ -7,10 +7,14 @@ import org.eclipse.collections.api.factory.Lists; -import edu.kit.kastel.mcse.ardoco.core.api.text.*; +import edu.kit.kastel.mcse.ardoco.core.api.text.DependencyTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.POSTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.Phrase; +import edu.kit.kastel.mcse.ardoco.core.api.text.PhraseType; +import edu.kit.kastel.mcse.ardoco.core.api.text.Sentence; +import edu.kit.kastel.mcse.ardoco.core.api.text.Text; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.IncomingDependencyDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.OutgoingDependencyDto; -import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.PosTag; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.SentenceDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.TextDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.WordDto; @@ -30,7 +34,7 @@ private TestUtil() { /** * generates a default textDTO without dependencies between the words - * + * * @return a default textDTO */ public static TextDto generateDefaultDTO() throws IOException { @@ -39,28 +43,28 @@ public static TextDto generateDefaultDTO() throws IOException { word1.setSentenceNo(1); word1.setLemma("this"); word1.setText("This"); - word1.setPosTag(PosTag.forValue("DT")); + word1.setPosTag(POSTag.forValue("DT")); WordDto word2 = new WordDto(); word2.setId(2); word2.setSentenceNo(1); word2.setLemma("be"); word2.setText("is"); - word2.setPosTag(PosTag.forValue("VBZ")); + word2.setPosTag(POSTag.forValue("VBZ")); WordDto word3 = new WordDto(); word3.setId(3); word3.setSentenceNo(1); word3.setLemma("I"); word3.setText("me"); - word3.setPosTag(PosTag.forValue("PRP")); + word3.setPosTag(POSTag.forValue("PRP")); WordDto word4 = new WordDto(); word4.setId(4); word4.setSentenceNo(1); word4.setLemma("."); word4.setText("."); - word4.setPosTag(PosTag.forValue(".")); + word4.setPosTag(POSTag.forValue(".")); List words = new ArrayList<>(List.of(word1, word2, word3, word4)); @@ -87,9 +91,9 @@ public static TextDto generateDefaultDTO() throws IOException { public static Text generateDefaultText() { TextImpl text = new TextImpl(); List words = new ArrayList<>(List.of(new WordImpl(text, 0, 0, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 2, 0, - "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", - new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 2, 0, "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence1 = new SentenceImpl(0, "This is me.", Lists.immutable.ofAll(words)); @@ -126,28 +130,28 @@ public static TextDto generateDTOWithMultipleSentences() throws IOException { word1.setSentenceNo(1); word1.setLemma("this"); word1.setText("This"); - word1.setPosTag(PosTag.forValue("DT")); + word1.setPosTag(POSTag.forValue("DT")); WordDto word2 = new WordDto(); word2.setId(2); word2.setSentenceNo(1); word2.setLemma("be"); word2.setText("is"); - word2.setPosTag(PosTag.forValue("VBZ")); + word2.setPosTag(POSTag.forValue("VBZ")); WordDto word3 = new WordDto(); word3.setId(3); word3.setSentenceNo(1); word3.setLemma("I"); word3.setText("me"); - word3.setPosTag(PosTag.forValue("PRP")); + word3.setPosTag(POSTag.forValue("PRP")); WordDto word4 = new WordDto(); word4.setId(4); word4.setSentenceNo(1); word4.setLemma("."); word4.setText("."); - word4.setPosTag(PosTag.forValue(".")); + word4.setPosTag(POSTag.forValue(".")); List words = new ArrayList<>(List.of(word1, word2, word3, word4)); @@ -162,28 +166,28 @@ public static TextDto generateDTOWithMultipleSentences() throws IOException { word5.setSentenceNo(2); word5.setLemma("this"); word5.setText("This"); - word5.setPosTag(PosTag.forValue("DT")); + word5.setPosTag(POSTag.forValue("DT")); WordDto word6 = new WordDto(); word6.setId(6); word6.setSentenceNo(2); word6.setLemma("be"); word6.setText("is"); - word6.setPosTag(PosTag.forValue("VBZ")); + word6.setPosTag(POSTag.forValue("VBZ")); WordDto word7 = new WordDto(); word7.setId(7); word7.setSentenceNo(2); word7.setLemma("you"); word7.setText("you"); - word7.setPosTag(PosTag.forValue("PRP")); + word7.setPosTag(POSTag.forValue("PRP")); WordDto word8 = new WordDto(); word8.setId(8); word8.setSentenceNo(2); word8.setLemma("."); word8.setText("."); - word8.setPosTag(PosTag.forValue(".")); + word8.setPosTag(POSTag.forValue(".")); List words2 = new ArrayList<>(List.of(word5, word6, word7, word8)); @@ -203,9 +207,9 @@ public static Text generateTextWithMultipleSentences() { List sentences = new ArrayList<>(); List words = new ArrayList<>(List.of(new WordImpl(text, 0, 0, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 2, 0, - "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", - new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 2, 0, "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence1 = new SentenceImpl(0, "This is me.", Lists.immutable.ofAll(words)); @@ -222,9 +226,9 @@ public static Text generateTextWithMultipleSentences() { sentences.add(sentence1); List words2 = new ArrayList<>(List.of(new WordImpl(text, 4, 1, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 5, 1, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 6, 1, - "you", POSTag.PRONOUN_PERSONAL, "you", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 7, 1, ".", POSTag.CLOSER, ".", - new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 5, 1, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 6, 1, "you", POSTag.PRONOUN_PERSONAL, "you", new ArrayList<>(), new ArrayList<>()), + new WordImpl(text, 7, 1, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence2 = new SentenceImpl(1, "This is you.", Lists.immutable.ofAll(words2)); Phrase subsubphrase2 = new PhraseImpl(Lists.immutable.of(words2.get(2)), PhraseType.NP, new ArrayList<>()); @@ -268,7 +272,7 @@ public static TextDto generateTextDtoWithDependencies() throws IOException { word1.setSentenceNo(1); word1.setLemma("hello"); word1.setText("Hello"); - word1.setPosTag(PosTag.forValue("UH")); + word1.setPosTag(POSTag.forValue("UH")); OutgoingDependencyDto outgoingDependency = new OutgoingDependencyDto(); outgoingDependency.setTargetWordId(2); outgoingDependency.setDependencyTag(DependencyTag.PUNCT); @@ -279,7 +283,7 @@ public static TextDto generateTextDtoWithDependencies() throws IOException { word2.setSentenceNo(1); word2.setLemma("."); word2.setText("."); - word2.setPosTag(PosTag.forValue(".")); + word2.setPosTag(POSTag.forValue(".")); IncomingDependencyDto incomingDependency = new IncomingDependencyDto(); incomingDependency.setSourceWordId(1); incomingDependency.setDependencyTag(DependencyTag.PUNCT); diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/JsonConverterTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/JsonConverterTest.java index 591438e1e..5e38ab8b8 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/JsonConverterTest.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/converter/JsonConverterTest.java @@ -11,9 +11,9 @@ import org.junit.jupiter.api.Test; import edu.kit.kastel.mcse.ardoco.core.api.text.DependencyTag; +import edu.kit.kastel.mcse.ardoco.core.api.text.POSTag; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.IncomingDependencyDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.OutgoingDependencyDto; -import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.PosTag; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.SentenceDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.TextDto; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.dto.WordDto; @@ -65,7 +65,7 @@ private TextDto getValidTextDtoExample() throws IOException { expectedWord.setSentenceNo(1); expectedWord.setLemma("hello"); expectedWord.setText("Hello"); - expectedWord.setPosTag(PosTag.forValue("UH")); + expectedWord.setPosTag(POSTag.forValue("UH")); OutgoingDependencyDto expectedOutDep = new OutgoingDependencyDto(); expectedOutDep.setTargetWordId(1); From 86a791b2822a2f1555eca9ee2ae266511e761825 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Tue, 19 Sep 2023 13:10:11 +0200 Subject: [PATCH 04/16] Update json-schema --- .../textproviderjson/textobject/TextImpl.java | 15 +- .../src/main/resources/schemas/text.json | 458 +++++++++--------- 2 files changed, 242 insertions(+), 231 deletions(-) diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java index 6d8b72647..5ef3514d8 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java @@ -16,6 +16,8 @@ public class TextImpl implements Text { private ImmutableList words; + private int length = -1; + public TextImpl() { sentences = Lists.immutable.empty(); words = Lists.immutable.empty(); @@ -26,10 +28,13 @@ public void setSentences(ImmutableList sentences) { } @Override - public int getLength() { - int length = 0; - for (Sentence sentence : sentences) { - length += sentence.getWords().size(); + public synchronized int getLength() { + if (this.length < 0) { + int calculatedLength = 0; + for (Sentence sentence : sentences) { + calculatedLength += sentence.getWords().size(); + } + this.length = calculatedLength; } return length; } @@ -50,7 +55,7 @@ public ImmutableList getSentences() { private ImmutableList collectWords() { MutableList collectedWords = Lists.mutable.empty(); for (Sentence sentence : sentences) { - collectedWords.addAll(sentence.getWords().castToCollection()); + collectedWords.addAll(sentence.getWords().toList()); } return collectedWords.toImmutable(); } diff --git a/framework/text-provider-json/src/main/resources/schemas/text.json b/framework/text-provider-json/src/main/resources/schemas/text.json index 2ce17633f..ee5974868 100644 --- a/framework/text-provider-json/src/main/resources/schemas/text.json +++ b/framework/text-provider-json/src/main/resources/schemas/text.json @@ -1,237 +1,243 @@ { - "$schema": "http://json-schema.org/draft-04/schema#", - "$id": "https://raw.githubusercontent.com/ArDoCo/Core/main/framework/text-provider-json/src/main/resources/schemas/text.json", - "title": "Text", - "description": "A definition of a text", - "type": "object", - "required": [ - "sentences" - ], - "properties": { - "sentences": { - "description": "the words that are contained in this sentence", - "type": "array", - "uniqueItems": true, - "items": { - "description": "Sentence in a text", - "type": "object", - "required": [ - "sentenceNo", - "text", - "constituencyTree", - "words" - ], - "properties": { - "sentenceNo": { - "description": "index of the sentence", - "type": "integer", - "minimum": 0 - }, - "text": { - "description": "the text of the sentence", - "type": "string" - }, - "constituencyTree": { - "description": "the constituency tree of the sentence in bracket notation", - "type": "string" - }, - "words": { + "$schema": "https://json-schema.org/draft/2020-12/schema#", + "$id": "https://raw.githubusercontent.com/ArDoCo/Core/main/framework/text-provider-json/src/main/resources/schemas/text.json", + "title": "Text", + "description": "A definition of a text", + "type": "object", + "required": [ + "sentences" + ], + "properties": { + "sentences": { "description": "the words that are contained in this sentence", "type": "array", "uniqueItems": true, "items": { - "description": "Definition of a word", - "type": "object", - "required": [ - "sentenceNo", - "id", - "text", - "lemma", - "posTag", - "outgoingDependencies", - "incomingDependencies" - ], - "properties": { - "sentenceNo": { - "description": "index of the sentence the word is contained in", - "type": "integer", - "minimum": 0 - }, - "id": { - "description": "The id of the word. Should be ascending from 1 for the first word in the text.", - "type": "integer", - "minimum": 1 - }, - "text": { - "description": "the text of the word", - "type": "string" - }, - "lemma": { - "description": "the lemma of the word", - "type": "string" - }, - "posTag": { - "$ref": "#/$defs/posTags" - }, - "outgoingDependencies": { - "description": "the outgoing dependencies", - "type": "array", - "uniqueItems": false, - "items": { - "type": "object", - "required": [ - "targetWordId", - "dependencyType" - ], - "properties": { - "targetWordId": { - "description": "The id of the word the dependency points to.", - "type": "integer" - }, - "dependencyType": { - "$refs": "#/$defs/dependencyTypes" - } + "description": "Sentence in a text", + "type": "object", + "required": [ + "sentenceNo", + "text", + "constituencyTree", + "words" + ], + "properties": { + "sentenceNo": { + "description": "index of the sentence", + "type": "integer", + "minimum": 0 + }, + "text": { + "description": "the text of the sentence", + "type": "string" + }, + "constituencyTree": { + "description": "the constituency tree of the sentence in bracket notation", + "type": "string" + }, + "words": { + "description": "the words that are contained in this sentence", + "type": "array", + "uniqueItems": true, + "items": { + "description": "Definition of a word", + "type": "object", + "required": [ + "sentenceNo", + "id", + "text", + "lemma", + "posTag", + "outgoingDependencies", + "incomingDependencies" + ], + "properties": { + "sentenceNo": { + "description": "index of the sentence the word is contained in", + "type": "integer", + "minimum": 0 + }, + "id": { + "description": "The id of the word. Should be ascending from 1 for the first word in the text.", + "type": "integer", + "minimum": 1 + }, + "text": { + "description": "the text of the word", + "type": "string" + }, + "lemma": { + "description": "the lemma of the word", + "type": "string" + }, + "posTag": { + "$ref": "#/$defs/posTags" + }, + "outgoingDependencies": { + "description": "the outgoing dependencies", + "type": "array", + "uniqueItems": false, + "items": { + "type": "object", + "required": [ + "targetWordId", + "dependencyType" + ], + "properties": { + "targetWordId": { + "description": "The id of the word the dependency points to.", + "type": "integer" + }, + "dependencyType": { + "$refs": "#/$defs/dependencyTypes" + } + } + } + }, + "incomingDependencies": { + "description": "the incoming dependencies", + "type": "array", + "uniqueItems": false, + "items": { + "type": "object", + "required": [ + "sourceWordId", + "dependencyType" + ], + "properties": { + "sourceWordId": { + "description": "The id of the word the dependency originates from.", + "type": "integer" + }, + "dependencyType": { + "$refs": "#/$defs/dependencyTypes" + } + } + } + } + } + } } - } - }, - "incomingDependencies": { - "description": "the incoming dependencies", - "type": "array", - "uniqueItems": false, - "items": { - "type": "object", - "required": [ - "sourceWordId", - "dependencyType" - ], - "properties": { - "sourceWordId": { - "description": "The id of the word the dependency originates from.", - "type": "integer" - }, - "dependencyType": { - "$refs": "#/$defs/dependencyTypes" - } - } - } } - } } - } } - } - } - }, - "$defs": { - "posTags": { - "description": "the lemma of the word", - "type": "string", - "enum": [ - "JJ", - "JJR", - "JJS", - "RB", - "RBR", - "RBS", - "WRB", - "CC", - "IN", - "CD", - "DT", - "WDT", - "EX", - "FW", - "HYPH", - "LS", - "NN", - "NNS", - "NNP", - "NNPS", - "PDT", - "POS", - "PRP", - "PRP$", - "WP$", - "WP", - "RP", - "SYM", - "TO", - "UH", - "VB", - "VBD", - "VBG", - "VBN", - "VBP", - "VBZ", - "MD", - ".", - ",", - ":", - "-LRB-", - "-RRB-", - "-NONE-", - "``", - "''", - "$", - "#" - ] }, - "dependencyTypes": { - "description": "The valid dependency tags", - "type": "string", - "enum": [ - "APPOS", - "NSUBJ", - "POSS", - "OBJ", - "IOBJ", - "NMOD", - "NSUBJPASS", - "POBJ", - "AGENT", - "NUM", - "PREDET", - "RCMOD", - "CSUBJ", - "CCOMP", - "XCOMP", - "OBL", - "VOCATIVE", - "EXPL", - "DISLOCATED", - "ADVCL", - "ADVMOD", - "DISCOURSE", - "AUXILIARY", - "COP", - "MARK", - "ACL", - "AMOD", - "DET", - "CLF", - "CASE", - "CONJ", - "CC", - "FIXED", - "FLAT", - "COMPOUND", - "LIST", - "PARATAXIS", - "ORPHAN", - "GOES_WITH", - "REPARANDUM", - "PUNCT", - "CSUBJ_PASS", - "ACL_RELCL", - "COMPOUND_PRT", - "NMOD_POSS", - "REF", - "NSUBJ_XSUBJ", - "NSUBJ_PASS_XSUBJ", - "NSUBJ_RELSUBJ", - "NSUBJ_PASS_RELSUBJ", - "OBJ_RELOBJ" - ] + "$defs": { + "posTags": { + "description": "the lemma of the word", + "type": "string", + "enum": [ + "JJ", + "JJR", + "JJS", + "RB", + "RBR", + "RBS", + "WRB", + "CC", + "IN", + "CD", + "DT", + "WDT", + "EX", + "FW", + "HYPH", + "LS", + "NN", + "NNS", + "NNP", + "NNPS", + "PDT", + "POS", + "PRP", + "PRP$", + "WP$", + "WP", + "RP", + "SYM", + "TO", + "UH", + "VB", + "VBD", + "VBG", + "VBN", + "VBP", + "VBZ", + "MD", + ".", + ",", + ":", + "-LRB-", + "-RRB-", + "-NONE-", + "``", + "''", + "$", + "#", + "HYPH", + "NFP", + "ADD", + "AFX", + "GW", + "XX" + ] + }, + "dependencyTypes": { + "description": "The valid dependency tags", + "type": "string", + "enum": [ + "APPOS", + "NSUBJ", + "POSS", + "OBJ", + "IOBJ", + "NMOD", + "NSUBJPASS", + "POBJ", + "AGENT", + "NUM", + "PREDET", + "RCMOD", + "CSUBJ", + "CCOMP", + "XCOMP", + "OBL", + "VOCATIVE", + "EXPL", + "DISLOCATED", + "ADVCL", + "ADVMOD", + "DISCOURSE", + "AUXILIARY", + "COP", + "MARK", + "ACL", + "AMOD", + "DET", + "CLF", + "CASE", + "CONJ", + "CC", + "FIXED", + "FLAT", + "COMPOUND", + "LIST", + "PARATAXIS", + "ORPHAN", + "GOES_WITH", + "REPARANDUM", + "PUNCT", + "CSUBJ_PASS", + "ACL_RELCL", + "COMPOUND_PRT", + "NMOD_POSS", + "REF", + "NSUBJ_XSUBJ", + "NSUBJ_PASS_XSUBJ", + "NSUBJ_RELSUBJ", + "NSUBJ_PASS_RELSUBJ", + "OBJ_RELOBJ" + ] + } } - } -} \ No newline at end of file +} From 3d98b7e1c61f3ab11bb01456d2d7593700e8f80b Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Tue, 19 Sep 2023 14:24:54 +0200 Subject: [PATCH 05/16] Add lazy initialization for text-related classes --- .../mcse/ardoco/core/api/text/Text.java | 8 ++ .../textobject/PhraseImpl.java | 101 ++++++++++-------- .../textproviderjson/textobject/TextImpl.java | 16 +++ .../textproviderjson/textobject/WordImpl.java | 8 +- .../informants/corenlp/TextImpl.java | 17 +++ .../informants/corenlp/WordImpl.java | 14 +-- .../textprocessor/TextProcessorService.java | 8 +- 7 files changed, 108 insertions(+), 64 deletions(-) diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/Text.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/Text.java index 138d0f3ea..5800c256a 100644 --- a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/Text.java +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/Text.java @@ -24,6 +24,14 @@ default int getLength() { */ ImmutableList words(); + /** + * Returns the word at the given index + * + * @param index the index + * @return the word at the given index + */ + Word getWord(int index); + /** * Returns the sentences of the text, ordered by appearance. * diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index c5476d34b..21565793e 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -22,24 +22,28 @@ public class PhraseImpl implements Phrase { private static final String PUNCTUATION_WITH_SPACE = "\\s+([.,;:?!])"; private static final String BRACKETS_WITH_SPACE = "\\s+([()\\[\\]{}<>])"; + private final PhraseType type; + private final ImmutableList childPhrases; private final ImmutableList nonPhraseWords; private ImmutableList phraseWords; - + private ImmutableList containedWords; + private ImmutableList subPhrases; + private ImmutableSortedMap phraseVector; + private int sentenceNo = -1; private String text; - private final PhraseType type; - - private final List childPhrases; - public PhraseImpl(ImmutableList nonPhraseWords, PhraseType type, List childPhrases) { this.nonPhraseWords = nonPhraseWords == null ? Lists.immutable.empty() : nonPhraseWords; this.type = type; - this.childPhrases = childPhrases; + this.childPhrases = Lists.immutable.ofAll(childPhrases); } @Override - public int getSentenceNo() { - return getContainedWords().get(0).getSentenceNo(); + public synchronized int getSentenceNo() { + if (sentenceNo < 0) { + sentenceNo = getContainedWords().get(0).getSentenceNo(); + } + return sentenceNo; } @Override @@ -60,70 +64,79 @@ public PhraseType getPhraseType() { } @Override - public ImmutableList getContainedWords() { - if (phraseWords == null) { - List collectedWords = new ArrayList<>(); - for (Phrase subphrase : childPhrases) { - collectedWords.addAll(subphrase.getContainedWords().castToList()); + public synchronized ImmutableList getContainedWords() { + if (containedWords == null) { + if (phraseWords == null) { + List collectedWords = new ArrayList<>(); + for (Phrase subphrase : childPhrases) { + collectedWords.addAll(subphrase.getContainedWords().castToList()); + } + this.phraseWords = Lists.immutable.ofAll(collectedWords); } - this.phraseWords = Lists.immutable.ofAll(collectedWords); + + MutableList words = Lists.mutable.ofAll(nonPhraseWords); + words.addAllIterable(phraseWords); + words.sortThis(Comparator.comparingInt(Word::getPosition)); + containedWords = words.toImmutable(); } - MutableList words = Lists.mutable.ofAll(nonPhraseWords); - words.addAllIterable(phraseWords); - words.sortThis(Comparator.comparingInt(Word::getPosition)); - return words.toImmutable(); + return containedWords; } @Override - public ImmutableList getSubPhrases() { - List subPhrases = new ArrayList<>(childPhrases); - for (Phrase childPhrase : childPhrases) { - subPhrases.addAll(childPhrase.getSubPhrases().toList()); + public synchronized ImmutableList getSubPhrases() { + if (subPhrases == null) { + MutableList tempSubPhrases = Lists.mutable.ofAll(childPhrases); + for (Phrase childPhrase : childPhrases) { + tempSubPhrases.addAll(childPhrase.getSubPhrases().toList()); + } + subPhrases = tempSubPhrases.toImmutable(); } - return Lists.immutable.ofAll(subPhrases); + return subPhrases; } @Override public boolean isSuperPhraseOf(Phrase other) { - List subphrases = this.childPhrases; + MutableList subphrases = Lists.mutable.ofAll(this.getSubPhrases()); while (!subphrases.isEmpty()) { if (subphrases.contains(other)) { return true; } - List newSubphrases = new ArrayList<>(); - for (Phrase subphrase : subphrases) { - newSubphrases.addAll(subphrase.getSubPhrases().castToList()); - } - subphrases = newSubphrases; + subphrases = getSubPhrasesOfPhrases(subphrases); } return false; } + private static MutableList getSubPhrasesOfPhrases(MutableList subphrases) { + MutableList newSubphrases = Lists.mutable.empty(); + for (Phrase subphrase : subphrases) { + newSubphrases.addAll(subphrase.getSubPhrases().castToList()); + } + subphrases = newSubphrases; + return subphrases; + } + @Override public boolean isSubPhraseOf(Phrase other) { - List subphrases = other.getSubPhrases().castToList(); + MutableList subphrases = Lists.mutable.ofAll(other.getSubPhrases()); while (!subphrases.isEmpty()) { if (subphrases.contains(this)) { return true; } - List newSubphrases = new ArrayList<>(); - for (Phrase subphrase : subphrases) { - newSubphrases.addAll(subphrase.getSubPhrases().castToList()); - } - subphrases = newSubphrases; + subphrases = getSubPhrasesOfPhrases(subphrases); } return false; } @Override - public ImmutableSortedMap getPhraseVector() { - MutableSortedMap phraseVector = SortedMaps.mutable.empty(); - - var grouped = getContainedWords().groupBy(Word::getText).toMap(); - grouped.forEach((key, value) -> phraseVector.put(value.getAny(), value.size())); - - return phraseVector.toImmutable(); + public synchronized ImmutableSortedMap getPhraseVector() { + if (this.phraseVector == null) { + MutableSortedMap phraseVector = SortedMaps.mutable.empty(); + var grouped = getContainedWords().groupBy(Word::getText).toMap(); + grouped.forEach((key, value) -> phraseVector.put(value.getAny(), value.size())); + this.phraseVector = phraseVector.toImmutable(); + } + return this.phraseVector; } @Override @@ -137,8 +150,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects - .equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), + phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); } @Override diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java index 5ef3514d8..e849717f3 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/TextImpl.java @@ -2,6 +2,8 @@ package edu.kit.kastel.mcse.ardoco.core.textproviderjson.textobject; import java.util.Objects; +import java.util.SortedMap; +import java.util.TreeMap; import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; @@ -15,6 +17,7 @@ public class TextImpl implements Text { private ImmutableList sentences; private ImmutableList words; + private final SortedMap wordsIndex = new TreeMap<>(); private int length = -1; @@ -43,10 +46,23 @@ public synchronized int getLength() { public ImmutableList words() { if (words.isEmpty()) { words = collectWords(); + int index = 0; + for (Word word : words) { + wordsIndex.put(index, word); + index++; + } } return words; } + @Override + public synchronized Word getWord(int index) { + if (wordsIndex.isEmpty()) { + words(); + } + return wordsIndex.get(index); + } + @Override public ImmutableList getSentences() { return sentences; diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java index 808b45f16..bba6a7825 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java @@ -68,7 +68,7 @@ public POSTag getPosTag() { public Word getPreWord() { int preWordIndex = indexInText - 1; if (preWord == null && preWordIndex > 0) { - preWord = parent.words().get(preWordIndex); + preWord = parent.getWord(preWordIndex); } return preWord; } @@ -77,7 +77,7 @@ public Word getPreWord() { public Word getNextWord() { int nextWordIndex = indexInText + 1; if (nextWord == null && nextWordIndex < parent.getLength()) { - nextWord = parent.words().get(nextWordIndex); + nextWord = parent.getWord(nextWordIndex); } return nextWord; } @@ -95,14 +95,14 @@ public String getLemma() { @Override public ImmutableList getOutgoingDependencyWordsWithType(DependencyTag dependencyTag) { List dependenciesOfType = this.outgoingDependencies.stream().filter(x -> x.getDependencyTag() == dependencyTag).toList(); - List words = dependenciesOfType.stream().map(x -> this.parent.words().get((int) x.getWordId())).toList(); + List words = dependenciesOfType.stream().map(x -> this.parent.getWord((int) x.getWordId())).toList(); return Lists.immutable.ofAll(words); } @Override public ImmutableList getIncomingDependencyWordsWithType(DependencyTag dependencyTag) { List dependenciesOfType = this.ingoingDependencies.stream().filter(x -> x.getDependencyTag() == dependencyTag).toList(); - List words = dependenciesOfType.stream().map(x -> this.parent.words().get((int) x.getWordId())).toList(); + List words = dependenciesOfType.stream().map(x -> this.parent.getWord((int) x.getWordId())).toList(); return Lists.immutable.ofAll(words); } diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/TextImpl.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/TextImpl.java index 8f06559aa..6528703b3 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/TextImpl.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/TextImpl.java @@ -1,6 +1,9 @@ /* Licensed under MIT 2022-2023. */ package edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp; +import java.util.SortedMap; +import java.util.TreeMap; + import org.eclipse.collections.api.factory.Lists; import org.eclipse.collections.api.list.ImmutableList; import org.eclipse.collections.api.list.MutableList; @@ -15,6 +18,7 @@ public class TextImpl implements Text { final CoreDocument coreDocument; private ImmutableList sentences = Lists.immutable.empty(); private ImmutableList words = Lists.immutable.empty(); + private final SortedMap wordsIndex = new TreeMap<>(); public TextImpl(CoreDocument coreDocument) { this.coreDocument = coreDocument; @@ -28,6 +32,14 @@ public ImmutableList words() { return words; } + @Override + public synchronized Word getWord(int index) { + if (wordsIndex.isEmpty()) { + words(); + } + return wordsIndex.get(index); + } + @Override public ImmutableList getSentences() { if (sentences.isEmpty()) { @@ -56,6 +68,11 @@ private void iterateDocumentForWordsAndSentences() { sentences = sentenceList.toImmutable(); words = wordList.toImmutable(); + int index = 0; + for (Word word : words) { + wordsIndex.put(index, word); + index++; + } } } diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java index 02f20726e..c583e2f5e 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java @@ -84,7 +84,7 @@ public POSTag getPosTag() { public Word getPreWord() { int preWordIndex = index - 1; if (preWord == null && preWordIndex > 0) { - preWord = parent.words().get(preWordIndex); + preWord = parent.getWord(preWordIndex); } return preWord; } @@ -93,7 +93,7 @@ public Word getPreWord() { public Word getNextWord() { int nextWordIndex = index + 1; if (nextWord == null && nextWordIndex < parent.getLength()) { - nextWord = parent.words().get(nextWordIndex); + nextWord = parent.getWord(nextWordIndex); } return nextWord; } @@ -103,10 +103,6 @@ public int getPosition() { return index; } - protected int getPositionInSentence() { - return this.token.index(); - } - protected int getBeginCharPosition() { return this.token.beginPosition(); } @@ -148,7 +144,7 @@ public ImmutableList getIncomingDependencyWordsWithType(DependencyTag depe private Word getCorrespondingWordForFirstTokenBasedOnSecondToken(CoreLabel firstToken, CoreLabel secondToken) { var firstTokenIndex = (firstToken.index() - secondToken.index()) + index; - return parent.words().get(firstTokenIndex); + return parent.getWord(firstTokenIndex); } private List getDependenciesOfType(DependencyTag dependencyTag) { @@ -171,8 +167,8 @@ public boolean equals(Object o) { if (!(o instanceof WordImpl word)) return false; - return word.getText().equals(this.getText()) && getPosition() == word.getPosition() && getPosTag() == word.getPosTag() && getSentenceNo() == word - .getSentenceNo(); + return word.getText() + .equals(this.getText()) && getPosition() == word.getPosition() && getPosTag() == word.getPosTag() && getSentenceNo() == word.getSentenceNo(); } @Override diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java index 2ae386523..ea6fbfaef 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java @@ -44,13 +44,7 @@ private String sendCorenlpRequest(String inputText) throws IOException { } private static String encodeText(String inputText) { - String encodedText = URLEncoder.encode(inputText, StandardCharsets.UTF_8); - return encodedText; - } - - private String sendAuthenticatedGetRequest(String requestUrl) throws IOException { - HttpCommunicator httpCommunicator = new HttpCommunicator(); - return httpCommunicator.sendAuthenticatedGetRequest(requestUrl); + return URLEncoder.encode(inputText, StandardCharsets.UTF_8); } private String sendAuthenticatedPostRequest(String requestUrl, String encodedText) throws IOException { From 2cbd62c46df73030f4b3549a01434f81b3ea14c4 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Wed, 20 Sep 2023 10:13:53 +0200 Subject: [PATCH 06/16] Formatting --- .../mcse/ardoco/core/api/text/POSTag.java | 1 + .../textobject/PhraseImpl.java | 4 ++-- .../textproviderjson/textobject/WordImpl.java | 2 +- .../ardoco/core/textproviderjson/TestUtil.java | 18 +++++++++--------- .../providers/informants/corenlp/WordImpl.java | 4 ++-- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java index eed06d529..a2e21f9cf 100644 --- a/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java +++ b/framework/common/src/main/java/edu/kit/kastel/mcse/ardoco/core/api/text/POSTag.java @@ -5,6 +5,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonValue; diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index 21565793e..a2f627a6e 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -150,8 +150,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), - phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects + .equals(childPhrases, phrase.childPhrases); } @Override diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java index bba6a7825..3c048c0e5 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/WordImpl.java @@ -132,7 +132,7 @@ public boolean equals(Object o) { return false; return indexInText == word.indexInText && sentenceNo == word.sentenceNo && Objects.equals(text, word.text) && posTag == word.posTag && Objects.equals( lemma, word.lemma) && Objects.equals(ingoingDependencies, word.ingoingDependencies) && Objects.equals(outgoingDependencies, - word.outgoingDependencies); + word.outgoingDependencies); } @Override diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java index 5efae0cb4..4e2015f0c 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TestUtil.java @@ -91,9 +91,9 @@ public static TextDto generateDefaultDTO() throws IOException { public static Text generateDefaultText() { TextImpl text = new TextImpl(); List words = new ArrayList<>(List.of(new WordImpl(text, 0, 0, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 2, 0, "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 2, 0, + "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", + new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence1 = new SentenceImpl(0, "This is me.", Lists.immutable.ofAll(words)); @@ -207,9 +207,9 @@ public static Text generateTextWithMultipleSentences() { List sentences = new ArrayList<>(); List words = new ArrayList<>(List.of(new WordImpl(text, 0, 0, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 2, 0, "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 1, 0, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 2, 0, + "me", POSTag.PRONOUN_PERSONAL, "I", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 3, 0, ".", POSTag.CLOSER, ".", + new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence1 = new SentenceImpl(0, "This is me.", Lists.immutable.ofAll(words)); @@ -226,9 +226,9 @@ public static Text generateTextWithMultipleSentences() { sentences.add(sentence1); List words2 = new ArrayList<>(List.of(new WordImpl(text, 4, 1, "This", POSTag.DETERMINER, "this", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 5, 1, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 6, 1, "you", POSTag.PRONOUN_PERSONAL, "you", new ArrayList<>(), new ArrayList<>()), - new WordImpl(text, 7, 1, ".", POSTag.CLOSER, ".", new ArrayList<>(), new ArrayList<>()))); + new WordImpl(text, 5, 1, "is", POSTag.VERB_SINGULAR_PRESENT_THIRD_PERSON, "be", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 6, 1, + "you", POSTag.PRONOUN_PERSONAL, "you", new ArrayList<>(), new ArrayList<>()), new WordImpl(text, 7, 1, ".", POSTag.CLOSER, ".", + new ArrayList<>(), new ArrayList<>()))); SentenceImpl sentence2 = new SentenceImpl(1, "This is you.", Lists.immutable.ofAll(words2)); Phrase subsubphrase2 = new PhraseImpl(Lists.immutable.of(words2.get(2)), PhraseType.NP, new ArrayList<>()); diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java index c583e2f5e..c53b2d6c6 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/WordImpl.java @@ -167,8 +167,8 @@ public boolean equals(Object o) { if (!(o instanceof WordImpl word)) return false; - return word.getText() - .equals(this.getText()) && getPosition() == word.getPosition() && getPosTag() == word.getPosTag() && getSentenceNo() == word.getSentenceNo(); + return word.getText().equals(this.getText()) && getPosition() == word.getPosition() && getPosTag() == word.getPosTag() && getSentenceNo() == word + .getSentenceNo(); } @Override From b093adc85f3160c44ddb94ac8b55255abab5d7e0 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Wed, 20 Sep 2023 10:53:35 +0200 Subject: [PATCH 07/16] Fix smells --- .../core/textproviderjson/textobject/PhraseImpl.java | 10 +++++----- .../corenlp/textprocessor/HttpCommunicator.java | 2 +- .../corenlp/textprocessor/TextProcessorService.java | 7 ++----- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index a2f627a6e..fe40e736d 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -131,10 +131,10 @@ public boolean isSubPhraseOf(Phrase other) { @Override public synchronized ImmutableSortedMap getPhraseVector() { if (this.phraseVector == null) { - MutableSortedMap phraseVector = SortedMaps.mutable.empty(); + MutableSortedMap tempPhraseVector = SortedMaps.mutable.empty(); var grouped = getContainedWords().groupBy(Word::getText).toMap(); - grouped.forEach((key, value) -> phraseVector.put(value.getAny(), value.size())); - this.phraseVector = phraseVector.toImmutable(); + grouped.forEach((key, value) -> tempPhraseVector.put(value.getAny(), value.size())); + this.phraseVector = tempPhraseVector.toImmutable(); } return this.phraseVector; } @@ -150,8 +150,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects - .equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), + phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); } @Override diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java index 45dda409e..c43888eb5 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java @@ -40,7 +40,7 @@ public String sendAuthenticatedPostRequest(String requestUrl, String body) throw } HttpPost request = new HttpPost(requestUrl); - StringEntity requestEntity = new StringEntity(body, ContentType.APPLICATION_JSON, StandardCharsets.UTF_8.toString(), false); //TODO + StringEntity requestEntity = new StringEntity(body, ContentType.APPLICATION_JSON, StandardCharsets.UTF_8.toString(), false); request.setEntity(requestEntity); BasicCredentialsProvider provider = new BasicCredentialsProvider(); diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java index ea6fbfaef..f1c6412ef 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/TextProcessorService.java @@ -6,8 +6,6 @@ import java.nio.charset.StandardCharsets; import org.jetbrains.annotations.NotNull; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import edu.kit.kastel.mcse.ardoco.core.api.text.Text; import edu.kit.kastel.mcse.ardoco.core.text.providers.informants.corenlp.config.ConfigManager; @@ -21,7 +19,6 @@ * This text processor processes texts by sending requests to a microservice, which provides text processing using CoreNLP. */ public class TextProcessorService { - private static final Logger logger = LoggerFactory.getLogger(TextProcessorService.class); /** * processes and annotates a given text by sending requests to a microservice @@ -31,12 +28,12 @@ public class TextProcessorService { */ public Text processText(String inputText) throws IOException, InvalidJsonException, NotConvertableException { TextDto textDto; - String jsonText = sendCorenlpRequest(inputText); + String jsonText = sendCoreNlpRequest(inputText); textDto = JsonConverter.fromJsonString(jsonText); return new DtoToObjectConverter().convertText(textDto); } - private String sendCorenlpRequest(String inputText) throws IOException { + private String sendCoreNlpRequest(String inputText) throws IOException { String encodedText = encodeText(inputText); ConfigManager configManager = ConfigManager.INSTANCE; String requestUrl = configManager.getMicroserviceUrl() + configManager.getCorenlpService(); From cdb544bc63b2136742beb70b72845e3d60ef19f5 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Wed, 20 Sep 2023 11:22:12 +0200 Subject: [PATCH 08/16] Add tests for PhraseImpl and TextImpl --- .../textobject/PhraseImpl.java | 4 +- .../core/textproviderjson/PhraseImplTest.java | 81 +++++++++++++++++++ .../core/textproviderjson/TextImplTest.java | 58 +++++++++++++ 3 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java create mode 100644 framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index fe40e736d..5eac91cf0 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -150,8 +150,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), - phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects + .equals(childPhrases, phrase.childPhrases); } @Override diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java new file mode 100644 index 000000000..3146b8bcf --- /dev/null +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java @@ -0,0 +1,81 @@ +/* Licensed under MIT 2023. */ +package edu.kit.kastel.mcse.ardoco.core.textproviderjson; + +import java.io.IOException; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import edu.kit.kastel.mcse.ardoco.core.api.text.Phrase; +import edu.kit.kastel.mcse.ardoco.core.api.text.Text; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.converter.DtoToObjectConverter; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.error.NotConvertableException; + +class PhraseImplTest { + + private static final DtoToObjectConverter CONVERTER = new DtoToObjectConverter(); + private static Phrase baselinePhrase; + private Phrase phraseImplInstance; + + @BeforeAll + static void initAll() { + Text baselineText = TestUtil.generateTextWithMultipleSentences(); + baselinePhrase = baselineText.getSentences().get(1).getPhrases().get(0); + } + + @BeforeEach + void init() { + try { + Text textImplInstance = CONVERTER.convertText(TestUtil.generateDTOWithMultipleSentences()); + phraseImplInstance = textImplInstance.getSentences().get(1).getPhrases().get(0); + } catch (NotConvertableException | IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void testGetSentenceNo() { + Assertions.assertEquals(baselinePhrase.getSentenceNo(), phraseImplInstance.getSentenceNo()); + } + + @Test + void testGetText() { + Assertions.assertEquals(baselinePhrase.getText(), phraseImplInstance.getText()); + } + + @Test + void testGetPhraseType() { + Assertions.assertEquals(baselinePhrase.getPhraseType(), phraseImplInstance.getPhraseType()); + } + + @Test + void testGetContainedWords() { + Assertions.assertEquals(baselinePhrase.getContainedWords().size(), phraseImplInstance.getContainedWords().size()); + } + + @Test + void testGetSubPhrases() { + Assertions.assertEquals(baselinePhrase.getSubPhrases().size(), phraseImplInstance.getSubPhrases().size()); + } + + @Test + void testIsSuperPhraseOf() { + Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); + Assertions.assertAll(// + () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf( + phraseImplInstance)), () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// + ); + } + + @Test + void testIsSubPhraseOf() { + Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); + Assertions.assertAll(// + () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf( + phraseImplInstance)), () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// + ); + } + +} diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java new file mode 100644 index 000000000..3ea0a99c0 --- /dev/null +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java @@ -0,0 +1,58 @@ +/* Licensed under MIT 2023. */ +package edu.kit.kastel.mcse.ardoco.core.textproviderjson; + +import java.io.IOException; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import edu.kit.kastel.mcse.ardoco.core.api.text.Text; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.converter.DtoToObjectConverter; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.error.NotConvertableException; + +class TextImplTest { + private static final DtoToObjectConverter CONVERTER = new DtoToObjectConverter(); + private static Text baselineText; + private Text textImplInstance; + + @BeforeAll + static void initAll() { + baselineText = TestUtil.generateTextWithMultipleSentences(); + } + + @BeforeEach + void init() { + try { + textImplInstance = CONVERTER.convertText(TestUtil.generateDTOWithMultipleSentences()); + } catch (NotConvertableException | IOException e) { + throw new RuntimeException(e); + } + } + + @Test + void getLengthTest() { + Assertions.assertEquals(baselineText.getLength(), textImplInstance.getLength()); + } + + @Test + void wordsTest() { + Assertions.assertEquals(baselineText.words().size(), textImplInstance.words().size()); + } + + @Test + void getWordTest() { + Assertions.assertEquals(baselineText.getWord(0), textImplInstance.getWord(0)); + } + + @Test + void getSentencesTest() { + Assertions.assertEquals(baselineText.getSentences().size(), textImplInstance.getSentences().size()); + } + + @Test + void simpleHashCodeTest() { + Assertions.assertEquals(textImplInstance.hashCode(), textImplInstance.hashCode()); + } +} From e60cbb9a406f6fe6d1048eb5fa041ea117f50ac7 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Wed, 20 Sep 2023 12:26:44 +0200 Subject: [PATCH 09/16] Update tests --- .../core/textproviderjson/PhraseImplTest.java | 25 ++++++++++++++----- .../core/textproviderjson/TextImplTest.java | 5 ++-- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java index 3146b8bcf..f22bd4eaa 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java @@ -12,12 +12,13 @@ import edu.kit.kastel.mcse.ardoco.core.api.text.Text; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.converter.DtoToObjectConverter; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.error.NotConvertableException; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.textobject.PhraseImpl; class PhraseImplTest { private static final DtoToObjectConverter CONVERTER = new DtoToObjectConverter(); private static Phrase baselinePhrase; - private Phrase phraseImplInstance; + private PhraseImpl phraseImplInstance; @BeforeAll static void initAll() { @@ -29,7 +30,7 @@ static void initAll() { void init() { try { Text textImplInstance = CONVERTER.convertText(TestUtil.generateDTOWithMultipleSentences()); - phraseImplInstance = textImplInstance.getSentences().get(1).getPhrases().get(0); + phraseImplInstance = (PhraseImpl) textImplInstance.getSentences().get(1).getPhrases().get(0); } catch (NotConvertableException | IOException e) { throw new RuntimeException(e); } @@ -64,8 +65,9 @@ void testGetSubPhrases() { void testIsSuperPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf( - phraseImplInstance)), () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// + () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), + () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf(phraseImplInstance)), + () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// ); } @@ -73,9 +75,20 @@ void testIsSuperPhraseOf() { void testIsSubPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf( - phraseImplInstance)), () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// + () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), + () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(phraseImplInstance)), + () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// ); } + @Test + void testGetPhraseVector() { + Assertions.assertEquals(baselinePhrase.getPhraseVector().size(), phraseImplInstance.getPhraseVector().size()); + } + + @Test + void simpleHashCodeTest() { + Assertions.assertEquals(phraseImplInstance.hashCode(), phraseImplInstance.hashCode()); + } + } diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java index 3ea0a99c0..d3d3ee2eb 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/TextImplTest.java @@ -11,11 +11,12 @@ import edu.kit.kastel.mcse.ardoco.core.api.text.Text; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.converter.DtoToObjectConverter; import edu.kit.kastel.mcse.ardoco.core.textproviderjson.error.NotConvertableException; +import edu.kit.kastel.mcse.ardoco.core.textproviderjson.textobject.TextImpl; class TextImplTest { private static final DtoToObjectConverter CONVERTER = new DtoToObjectConverter(); private static Text baselineText; - private Text textImplInstance; + private TextImpl textImplInstance; @BeforeAll static void initAll() { @@ -25,7 +26,7 @@ static void initAll() { @BeforeEach void init() { try { - textImplInstance = CONVERTER.convertText(TestUtil.generateDTOWithMultipleSentences()); + textImplInstance = (TextImpl) CONVERTER.convertText(TestUtil.generateDTOWithMultipleSentences()); } catch (NotConvertableException | IOException e) { throw new RuntimeException(e); } From 14dd67bc3c3162c07b2d91dc62339524b78268d3 Mon Sep 17 00:00:00 2001 From: Gram21 Date: Wed, 20 Sep 2023 10:27:49 +0000 Subject: [PATCH 10/16] Apply formatting changes --- .../ardoco/core/textproviderjson/PhraseImplTest.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java index f22bd4eaa..cb77b6bfc 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java @@ -65,9 +65,8 @@ void testGetSubPhrases() { void testIsSuperPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), - () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf(phraseImplInstance)), - () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// + () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf( + phraseImplInstance)), () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// ); } @@ -75,9 +74,8 @@ void testIsSuperPhraseOf() { void testIsSubPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), - () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(phraseImplInstance)), - () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// + () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf( + phraseImplInstance)), () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// ); } From 4a75d3a1c56df7b370c9a928ae49128a535f58df Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Wed, 20 Sep 2023 12:39:14 +0200 Subject: [PATCH 11/16] Add text-provider-json to report module pom --- .../ardoco/core/textproviderjson/PhraseImplTest.java | 10 ++++------ report/pom.xml | 5 +++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java index f22bd4eaa..cb77b6bfc 100644 --- a/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java +++ b/framework/text-provider-json/src/test/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/PhraseImplTest.java @@ -65,9 +65,8 @@ void testGetSubPhrases() { void testIsSuperPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), - () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf(phraseImplInstance)), - () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// + () -> Assertions.assertTrue(phraseImplInstance.isSuperPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSuperPhraseOf( + phraseImplInstance)), () -> Assertions.assertFalse(subphrase.isSuperPhraseOf(phraseImplInstance))// ); } @@ -75,9 +74,8 @@ void testIsSuperPhraseOf() { void testIsSubPhraseOf() { Phrase subphrase = phraseImplInstance.getSubPhrases().get(0); Assertions.assertAll(// - () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), - () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(phraseImplInstance)), - () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// + () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf(subphrase)), () -> Assertions.assertFalse(phraseImplInstance.isSubPhraseOf( + phraseImplInstance)), () -> Assertions.assertTrue(subphrase.isSubPhraseOf(phraseImplInstance))// ); } diff --git a/report/pom.xml b/report/pom.xml index ad2095ee0..957c05cc7 100644 --- a/report/pom.xml +++ b/report/pom.xml @@ -116,6 +116,11 @@ + + io.github.ardoco.core + text-provider-json + ${revision} + org.junit.jupiter junit-jupiter-engine From 556dd1eae4f3aa13a611b399e24d3f625da2e289 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Thu, 21 Sep 2023 12:00:53 +0200 Subject: [PATCH 12/16] Update subphrase construction in remote PhraseImpl --- .../core/textproviderjson/textobject/PhraseImpl.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index 5eac91cf0..8deecb43f 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -108,12 +108,11 @@ public boolean isSuperPhraseOf(Phrase other) { } private static MutableList getSubPhrasesOfPhrases(MutableList subphrases) { - MutableList newSubphrases = Lists.mutable.empty(); + MutableList subPhrasesOfPhrases = Lists.mutable.empty(); for (Phrase subphrase : subphrases) { - newSubphrases.addAll(subphrase.getSubPhrases().castToList()); + subPhrasesOfPhrases.addAll(subphrase.getSubPhrases().castToList()); } - subphrases = newSubphrases; - return subphrases; + return subPhrasesOfPhrases; } @Override @@ -150,8 +149,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects - .equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), + phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); } @Override From ce49142a12d3889928f03757a1daa9c64a2e411a Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Thu, 21 Sep 2023 12:01:36 +0200 Subject: [PATCH 13/16] Update env variables for microservice credentials --- .../corenlp/textprocessor/HttpCommunicator.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java index c43888eb5..20a42ecb5 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java @@ -17,11 +17,14 @@ public class HttpCommunicator { + public static final String ENV_USERNAME = "SCNLP_SERVICE_USER"; + public static final String ENV_PASSWORD = "SCNLP_SERVICE_PASSWORD"; + public String sendAuthenticatedGetRequest(String requestUrl) throws IOException { - String username = System.getenv("USERNAME"); - String password = System.getenv("PASSWORD"); + String username = System.getenv(ENV_USERNAME); + String password = System.getenv(ENV_PASSWORD); if (username == null || password == null) { - throw new IOException("Environment variables USERNAME and PASSWORD must be set."); + throw new IOException("Environment variables MS_USER and PASSWORD must be set."); } HttpGet request = new HttpGet(requestUrl); @@ -33,8 +36,8 @@ public String sendAuthenticatedGetRequest(String requestUrl) throws IOException } public String sendAuthenticatedPostRequest(String requestUrl, String body) throws IOException { - String username = System.getenv("USERNAME"); - String password = System.getenv("PASSWORD"); + String username = System.getenv(ENV_USERNAME); + String password = System.getenv(ENV_PASSWORD); if (username == null || password == null) { throw new IOException("Environment variables USERNAME and PASSWORD must be set."); } From 4a0eb320c3a3aaa8c54ba52a26c20859e541eef0 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Thu, 21 Sep 2023 16:47:34 +0200 Subject: [PATCH 14/16] Add information about env variables to README --- README.md | 34 +++++++++++++++++-- .../textobject/PhraseImpl.java | 4 +-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 89920a0e2..6c51831d2 100644 --- a/README.md +++ b/README.md @@ -17,9 +17,12 @@ of _[KASTEL - Institute of Information Security and Dependability](https://kaste the [KIT](https://www.kit.edu). ## User Interfaces -To be able to execute the core algorithms from this repository, you can write own user interfaces that (should) use the [ArDoCoRunner](https://github.com/ArDoCo/Core/blob/main/pipeline/pipeline-core/src/main/java/edu/kit/kastel/mcse/ardoco/core/execution/runner/ArDoCoRunner.java). -We provide an example Command Line Interface (CLI) at [ArDoCo/CLI](https://github.com/ArDoCo/CLI) as well as a simple Graphical User Interface (GUI) at [ArDoCo/GUI](https://github.com/ArDoCo/GUI). +To be able to execute the core algorithms from this repository, you can write own user interfaces that (should) use +the [ArDoCoRunner](https://github.com/ArDoCo/Core/blob/main/pipeline/pipeline-core/src/main/java/edu/kit/kastel/mcse/ardoco/core/execution/runner/ArDoCoRunner.java). + +We provide an example Command Line Interface (CLI) at [ArDoCo/CLI](https://github.com/ArDoCo/CLI) as well as a simple Graphical User Interface (GUI) +at [ArDoCo/GUI](https://github.com/ArDoCo/GUI). Future user interfaces like an enhanced GUI or a web interface are planned. @@ -39,6 +42,7 @@ To test the Core, you could use case studies and benchmarks provided in .. ## Maven ```xml + io.github.ardoco.core @@ -49,7 +53,9 @@ To test the Core, you could use case studies and benchmarks provided in .. ``` For snapshot releases, make sure to add the following repository + ```xml + @@ -64,9 +70,31 @@ For snapshot releases, make sure to add the following repository ``` +## Microservice for text preprocessing + +Text preprocessing works locally, but there is also the option to host a microservice for this. +The benefit is that the models do not need to be loaded each time, saving some runtime (and local memory). + +The microservice can be found at [ArDoCo/StanfordCoreNLP-Provider-Service](https://github.com/ArDoCo/StanfordCoreNLP-Provider-Service/). + +The microservice is secured with credentials and the usage of the microservice needs to be activated and the URL of the microservice configured. +These settings can be provided to the execution via environment variables. +To do so, set the following variables: + +```env +NLP_PROVIDER_SOURCE=microservice +MICROSERVICE_URL=[microservice_url] +SCNLP_SERVICE_USER=[your_username] +SCNLP_SERVICE_PASSWORD=[your_password] +``` + +The first variable `NLP_PROVIDER_SOURCE=microservice` activates the microservice usage. +The next three variables configure the connection, and you need to provide the configuration for your deployed microservice. + ## Attribution -The initial version of this project is based on the master thesis [Linking Software Architecture Documentation and Models](https://doi.org/10.5445/IR/1000126194). +The initial version of this project is based on the master +thesis [Linking Software Architecture Documentation and Models](https://doi.org/10.5445/IR/1000126194). ## Acknowledgements diff --git a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java index 8deecb43f..e6b7f2a12 100644 --- a/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java +++ b/framework/text-provider-json/src/main/java/edu/kit/kastel/mcse/ardoco/core/textproviderjson/textobject/PhraseImpl.java @@ -149,8 +149,8 @@ public boolean equals(Object o) { return true; if (!(o instanceof PhraseImpl phrase)) return false; - return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), - phrase.getText()) && type == phrase.type && Objects.equals(childPhrases, phrase.childPhrases); + return Objects.equals(getContainedWords(), phrase.getContainedWords()) && Objects.equals(getText(), phrase.getText()) && type == phrase.type && Objects + .equals(childPhrases, phrase.childPhrases); } @Override From 9a64795f5dd923af37c01dec29a775c6df1d3b17 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Fri, 22 Sep 2023 09:13:17 +0200 Subject: [PATCH 15/16] Update stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dominik Fuchß --- .../informants/corenlp/textprocessor/HttpCommunicator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java index 20a42ecb5..358706859 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java @@ -39,7 +39,7 @@ public String sendAuthenticatedPostRequest(String requestUrl, String body) throw String username = System.getenv(ENV_USERNAME); String password = System.getenv(ENV_PASSWORD); if (username == null || password == null) { - throw new IOException("Environment variables USERNAME and PASSWORD must be set."); + throw new IOException("Environment variables " + ENV_USERNAME + " and " + ENV_PASSWORD + " must be set."); } HttpPost request = new HttpPost(requestUrl); From 3a574d4faa448f0674ee0229db5007739ff61e29 Mon Sep 17 00:00:00 2001 From: Jan Keim Date: Fri, 22 Sep 2023 14:06:45 +0200 Subject: [PATCH 16/16] Update stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Dominik Fuchß --- .../informants/corenlp/textprocessor/HttpCommunicator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java index 358706859..eb97cee3b 100644 --- a/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java +++ b/stages/text-preprocessing/src/main/java/edu/kit/kastel/mcse/ardoco/core/text/providers/informants/corenlp/textprocessor/HttpCommunicator.java @@ -24,7 +24,7 @@ public String sendAuthenticatedGetRequest(String requestUrl) throws IOException String username = System.getenv(ENV_USERNAME); String password = System.getenv(ENV_PASSWORD); if (username == null || password == null) { - throw new IOException("Environment variables MS_USER and PASSWORD must be set."); + throw new IOException("Environment variables " + ENV_USERNAME + " and " + ENV_PASSWORD + " must be set."); } HttpGet request = new HttpGet(requestUrl);