diff --git a/.gitignore b/.gitignore
index 0212d0c..0a72841 100644
--- a/.gitignore
+++ b/.gitignore
@@ -174,3 +174,4 @@ buildNumber.properties
!/.mvn/wrapper/maven-wrapper.jar
# End of https://www.gitignore.io/api/intellij,jetbrains,java,maven,eclipse
+.idea
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
deleted file mode 100644
index 4e42293..0000000
--- a/.idea/compiler.xml
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/copyright/Lambda___GPL_v3_.xml b/.idea/copyright/Lambda___GPL_v3_.xml
deleted file mode 100644
index d201bc0..0000000
--- a/.idea/copyright/Lambda___GPL_v3_.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/copyright/Lambda___MIT_.xml b/.idea/copyright/Lambda___MIT_.xml
deleted file mode 100644
index c987fe2..0000000
--- a/.idea/copyright/Lambda___MIT_.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml
deleted file mode 100644
index ca4cabf..0000000
--- a/.idea/copyright/profiles_settings.xml
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
deleted file mode 100644
index b26911b..0000000
--- a/.idea/encodings.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 6612519..0000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
deleted file mode 100644
index 25ec0e9..0000000
--- a/.idea/modules.xml
+++ /dev/null
@@ -1,8 +0,0 @@
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 94a25f7..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-
-
-
\ No newline at end of file
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java b/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java
index 4a34555..eea1d2c 100644
--- a/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java
+++ b/src/main/java/org/lambda3/text/simplification/discourse/processing/DiscourseSimplifier.java
@@ -29,6 +29,8 @@
import org.lambda3.text.simplification.discourse.model.SimplificationContent;
import org.lambda3.text.simplification.discourse.runner.discourse_extraction.DiscourseExtractor;
import org.lambda3.text.simplification.discourse.runner.discourse_tree.DiscourseTreeCreator;
+import org.lambda3.text.simplification.discourse.runner.discourse_tree.Relation;
+import org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule;
import org.lambda3.text.simplification.discourse.utils.ConfigUtils;
import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
@@ -44,15 +46,16 @@
*
*/
public class DiscourseSimplifier {
- private final DiscourseTreeCreator discourseTreeCreator;
- private final DiscourseExtractor discourseExtractor;
+ private final List ignoredRelations;
+ private final List extractionRules;
+ private final SentencePreprocessor preprocessor;
private final Logger logger = LoggerFactory.getLogger(getClass());
public DiscourseSimplifier(Config config) {
- SentencePreprocessor preprocessor = new SentencePreprocessor(config);
- this.discourseTreeCreator = new DiscourseTreeCreator(config, preprocessor);
- this.discourseExtractor = new DiscourseExtractor(config);
+ this.preprocessor = new SentencePreprocessor(config);
+ this.ignoredRelations = DiscourseExtractor.extractIgnoredRelationsFromConfig(config);
+ this.extractionRules = DiscourseTreeCreator.extractRulesFromConfig(config);
logger.debug("DiscourseSimplifier initialized");
logger.debug("\n{}", ConfigUtils.prettyPrint(config));
@@ -89,7 +92,7 @@ public SimplificationContent doDiscourseSimplification(List sentences, P
// creates one discourse discourse_tree over all sentences (investigates intra-sentential and inter-sentential relations)
private SimplificationContent processWhole(List sentences) {
SimplificationContent content = new SimplificationContent();
-
+ DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor);
// Step 1) create document discourse discourse_tree
logger.info("### STEP 1) CREATE DOCUMENT DISCOURSE TREE ###");
discourseTreeCreator.reset();
@@ -120,6 +123,8 @@ private SimplificationContent processWhole(List sentences) {
// Step 2) do discourse extraction
logger.info("### STEP 2) DO DISCOURSE EXTRACTION ###");
+
+ DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations);
List elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree());
elements.forEach(e -> content.addElement(e));
if (logger.isDebugEnabled()) {
@@ -133,6 +138,7 @@ private SimplificationContent processWhole(List sentences) {
// creates discourse trees for each individual sentence (investigates intra-sentential relations only)
private SimplificationContent processSeparate(List sentences) {
SimplificationContent content = new SimplificationContent();
+ DiscourseTreeCreator discourseTreeCreator = new DiscourseTreeCreator(extractionRules, preprocessor);
int idx = 0;
for (String sentence : sentences) {
@@ -153,6 +159,7 @@ private SimplificationContent processSeparate(List sentences) {
// Step 2) do discourse extraction
logger.debug("### STEP 2) DO DISCOURSE EXTRACTION ###");
+ DiscourseExtractor discourseExtractor = new DiscourseExtractor(ignoredRelations);
List elements = discourseExtractor.doDiscourseExtraction(discourseTreeCreator.getDiscourseTree());
elements.forEach(e -> outSentence.addElement(e));
logger.debug(outSentence.toString());
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java
index ddcf0da..a88bf45 100644
--- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java
+++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_extraction/DiscourseExtractor.java
@@ -47,15 +47,12 @@ public class DiscourseExtractor {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final List ignoredRelations;
- private final Config config;
private LinkedHashMap processedLeaves;
- public DiscourseExtractor(Config config) {
- this.config = config;
-
- // create ignored relations from config
- this.ignoredRelations = new ArrayList<>();
- for (String valueName : this.config.getStringList("ignored-relations")) {
+ public static List extractIgnoredRelationsFromConfig(Config config) {
+ Logger logger = LoggerFactory.getLogger(DiscourseExtractor.class);
+ List ignoredRelations = new ArrayList<>();
+ for (String valueName : config.getStringList("ignored-relations")) {
try {
Relation relation = Relation.valueOf(valueName);
ignoredRelations.add(relation);
@@ -64,10 +61,21 @@ public DiscourseExtractor(Config config) {
throw new ConfigException.BadValue("ignored-relations." + valueName, "Failed to create enum value.");
}
}
+ return ignoredRelations;
+ }
+ public DiscourseExtractor(List ignoredRelations) {
+ this.ignoredRelations = ignoredRelations;
this.processedLeaves = new LinkedHashMap();
}
+ public DiscourseExtractor(Config config) {
+
+ // create ignored relations from config
+ this(extractIgnoredRelationsFromConfig(config));
+
+ }
+
public List doDiscourseExtraction(DiscourseTree discourseTree) {
this.processedLeaves = new LinkedHashMap();
@@ -97,7 +105,7 @@ private void addAsContext(Leaf leaf, Leaf targetLeaf, Relation targetRelation) {
private void extractRec(DiscourseTree node, int contextLayer) {
if (node instanceof Leaf) {
- Leaf leaf = (Leaf)node;
+ Leaf leaf = (Leaf) node;
if (!leaf.isToSimpleContext()) {
// create new element
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java
index 1d2d12a..62d12be 100644
--- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java
+++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/DiscourseTreeCreator.java
@@ -43,20 +43,17 @@
*
*/
public class DiscourseTreeCreator {
- private final Config config;
+ //private final Config config;
private final SentencePreprocessor preprocessor;
private final List rules;
private final Logger logger = LoggerFactory.getLogger(getClass());
private Coordination discourseTree;
- public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
- this.config = config;
- this.preprocessor = preprocessor;
-
- // create rules from config
- this.rules = new ArrayList<>();
- for (String className : this.config.getStringList("rules")) {
+ public static List extractRulesFromConfig(Config config) {
+ Logger logger = LoggerFactory.getLogger(DiscourseTreeCreator.class);
+ List rules = new ArrayList<>();
+ for (String className : config.getStringList("rules")) {
try {
Class> clazz = Class.forName(className);
Constructor> constructor = clazz.getConstructor();
@@ -69,7 +66,23 @@ public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
throw new ConfigException.BadValue("rules." + className, "Failed to create instance.");
}
}
+ return rules;
+ }
+
+ public DiscourseTreeCreator(Config config, SentencePreprocessor preprocessor) {
+ //this.config = config;
+ //this.preprocessor = preprocessor;
+
+ // create rules from config
+ //this.rules = ;
+ this(DiscourseTreeCreator.extractRulesFromConfig(config), preprocessor);
+ reset();
+ }
+
+ public DiscourseTreeCreator(List rules, SentencePreprocessor preprocessor) {
+ this.preprocessor = preprocessor;
+ this.rules = rules;
reset();
}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java
index 99880d4..29e6322 100644
--- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java
+++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/extraction/ExtractionRule.java
@@ -64,6 +64,7 @@ public void setConfig(Config config) {
this.classifer = new CuePhraseClassifier(config);
}
+
public abstract Optional extract(Leaf leaf) throws ParseTreeException;
protected static List getSiblings(Tree parseTree, List tags) {
@@ -186,49 +187,49 @@ protected static List rephraseEnablement(Tree s, Tree vp) {
return res;
}
-
+
protected static String rephraseApposition(Tree vp, String np) {
String res = "";
Tense tense = getTense(vp);
//Number number = getNumber(np);
if (tense.equals(Tense.PRESENT)) {
- if (np.equals("NN") || np.equals("NNP")) {
- res = " is ";
- } else {
- res = " are ";
- }
+ if (np.equals("NN") || np.equals("NNP")) {
+ res = " is ";
+ } else {
+ res = " are ";
+ }
} else {
- if (np.equals("NN") || np.equals("NNP")) {
- res = " was ";
- } else {
- res = " were ";
- }
+ if (np.equals("NN") || np.equals("NNP")) {
+ res = " was ";
+ } else {
+ res = " were ";
+ }
}
-
+
return res;
}
-
+
protected static List rephraseAppositionNonRes(Tree vp, Tree np, Tree np2) {
List res = new ArrayList<>();
Tense tense = getTense(vp);
Number number = getNumber(np);
if (tense.equals(Tense.PRESENT)) {
- if (number.equals(Number.SINGULAR)) {
- res.add(new Word("is"));
- } else {
- res.add(new Word("are"));
- }
+ if (number.equals(Number.SINGULAR)) {
+ res.add(new Word("is"));
+ } else {
+ res.add(new Word("are"));
+ }
} else {
- if (number.equals(Number.SINGULAR)) {
- res.add(new Word("was"));
- } else {
- res.add(new Word("were"));
- }
+ if (number.equals(Number.SINGULAR)) {
+ res.add(new Word("was"));
+ } else {
+ res.add(new Word("were"));
+ }
}
res = appendWordsFromTree(res, np2);
-
+
return res;
}
@@ -245,7 +246,7 @@ protected static List getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr
if (matcher.findAt(s)) {
List res = new ArrayList<>();
- res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have"));
+ res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
res.add(new Word("been"));
List next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
if (next.size() > 0) {
@@ -260,7 +261,7 @@ protected static List getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tr
if (matcher.findAt(s)) {
List res = new ArrayList<>();
- res.add(new Word((number.equals(Number.SINGULAR))? "has" : "have"));
+ res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
List next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
if (next.size() > 0) {
next.set(0, WordsUtils.lowercaseWord(next.get(0)));
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java
index 62a3027..154dc54 100644
--- a/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java
+++ b/src/main/java/org/lambda3/text/simplification/discourse/runner/discourse_tree/model/Leaf.java
@@ -57,6 +57,10 @@ public Leaf(String extractionRule, String text) throws ParseTreeException {
this(extractionRule, ParseTreeParser.parse(text));
}
+// public void saveTree() {
+//
+// }
+
public void dontAllowSplit() {
this.allowSplit = false;
}
@@ -73,6 +77,7 @@ public String getText() {
return WordsUtils.wordsToString(ParseTreeExtractionUtils.getContainingWords(parseTree));
}
+
public void setToSimpleContext(boolean toSimpleContext) {
this.toSimpleContext = toSimpleContext;
}