From 364cdf13fbf2909a67b9bb371e37f9d78318ff08 Mon Sep 17 00:00:00 2001 From: Denilson Barbosa Date: Thu, 1 Jun 2017 13:40:26 -0600 Subject: [PATCH] Update WikiTextParser.java Remove extra '#' from REDIRECT matcher. The '#' already appears in the JSON file with the language localization. --- src/main/java/edu/jhu/nlp/wikipedia/WikiTextParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/jhu/nlp/wikipedia/WikiTextParser.java b/src/main/java/edu/jhu/nlp/wikipedia/WikiTextParser.java index 4e75369..45014be 100755 --- a/src/main/java/edu/jhu/nlp/wikipedia/WikiTextParser.java +++ b/src/main/java/edu/jhu/nlp/wikipedia/WikiTextParser.java @@ -78,7 +78,7 @@ private void findRedirect(String wikiText) { * Create localized patterns (given the language in the constructor) for redirects, stubs, etc. */ private void createPatterns(){ - redirectPattern = Pattern.compile("#"+language.getLocalizedRedirectLabel()+"\\s*\\[\\[(.*?)\\]\\]", Pattern.CASE_INSENSITIVE); + redirectPattern = Pattern.compile(language.getLocalizedRedirectLabel()+"\\s*\\[\\[(.*?)\\]\\]", Pattern.CASE_INSENSITIVE); stubPattern = Pattern.compile("\\-"+language.getLocalizedStubLabel()+"\\}\\}", Pattern.CASE_INSENSITIVE); disambiguationPattern = Pattern.compile("\\{\\{"+language.getDisambiguationLabel()+"\\}\\}", Pattern.CASE_INSENSITIVE); }