Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhrystiuk-ks committed Jan 2, 2025
1 parent d4192f7 commit 0f823b9
Show file tree
Hide file tree
Showing 12 changed files with 246 additions and 80 deletions.
2 changes: 1 addition & 1 deletion src/main/java/liqp/LValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ public Map<String, Object> asMap(Object value) {
}

public static boolean isBlank(final String string) {
if (string == null || string.length() == 0)
if (string == null || string.isEmpty())
return true;

int l = string.length();
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/liqp/filters/date/fuzzy/LookupResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import java.util.List;

class LookupResult {
public class LookupResult {

private final String name;
final List<Part> parts;
final boolean found;
final public boolean found;

LookupResult(String name, List<Part> parts, boolean found) {
public LookupResult(String name, List<Part> parts, boolean found) {
this.name = name;
this.parts = parts;
this.found = found;
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/liqp/filters/date/fuzzy/Part.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class NewPart implements Part {
final int end;
protected final String source;

NewPart(int start, int end, String source) {
public NewPart(int start, int end, String source) {
this.start = start;
this.end = end;
this.source = source;
Expand Down Expand Up @@ -106,7 +106,7 @@ class RecognizedPart implements Part {
protected final List<String> patterns;
public final String source;

RecognizedPart(int start, int end, List<String> patterns, String source) {
public RecognizedPart(int start, int end, List<String> patterns, String source) {
this.start = start;
this.end = end;
this.patterns = patterns;
Expand Down Expand Up @@ -148,7 +148,7 @@ public String toString() {
}

class RecognizedMonthNamePart extends RecognizedPart {
RecognizedMonthNamePart(int start, int end, List<String> patterns, String source) {
public RecognizedMonthNamePart(int start, int end, List<String> patterns, String source) {
super(start, end, patterns, source);
}

Expand Down
61 changes: 57 additions & 4 deletions src/main/java/liqp/filters/date/fuzzy/PartExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,75 @@
import java.util.List;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import liqp.filters.date.fuzzy.Part.NewPart;
import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart;
import liqp.filters.date.fuzzy.Part.RecognizedPart;
import liqp.filters.date.fuzzy.extractors.PartExtractorResult;

public interface PartExtractor {
public abstract class PartExtractor {

public PartExtractorResult extract(String source, List<Part> parts, int i) {
throw new UnsupportedOperationException("Not supported yet.");
}

PartExtractorResult extract(String source, List<Part> parts, int i);

default List<String> newList(String... el) {
protected List<String> newList(String... el) {
return Arrays.asList(el);
}

default List<String> appendToExisting(List<String> start, Supplier<List<String>> supplier) {
protected List<String> appendToExisting(List<String> start, Supplier<List<String>> supplier) {
if (start.isEmpty()) {
return supplier.get();
}
return start.stream()
.flatMap(prefix -> supplier.get().stream().map(suffix -> prefix + suffix))
.collect(Collectors.toList());
}

public LookupResult extract(List<Part> parts) {
for (int i = 0; i < parts.size(); i++) {
Part part = parts.get(i);

if (part.state() == Part.PartState.NEW) {
String source = part.source();
PartExtractorResult per = extract(source, parts, i);
if (per.found) {
return getLookupResult(parts, i, per);
}
}
}
return new LookupResult("<none>", parts, false);
}

protected LookupResult getLookupResult(List<Part> parts, int i, PartExtractorResult per) {

Part part = parts.get(i);
String source = part.source();

parts.remove(i);

if (per.end != source.length()) {
NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end));
parts.add(i, after);
}

RecognizedPart recognized;
if (per.isMonthName) {
recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(
per.start, per.end));
} else {
recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(
per.start, per.end));
}
parts.add(i, recognized);

if (per.start != 0) {
NewPart before = new NewPart(
part.start(), part.start() + per.start, source.substring(0, per.start));
parts.add(i, before);
}

return new LookupResult(per.extractorName, parts, true);
}

}
33 changes: 1 addition & 32 deletions src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -102,38 +102,7 @@ private boolean notSet(Boolean val) {
return val == null;
}
private LookupResult lookup(List<Part> parts, PartExtractor partExtractor) {
for (int i = 0; i < parts.size(); i++) {
Part part = parts.get(i);

if (part.state() == Part.PartState.NEW) {
String source = part.source();
PartExtractorResult per = partExtractor.extract(source, parts, i);
if (per.found) {
parts.remove(i);

if (per.end != source.length()) {
NewPart after = new NewPart(part.start() + per.end, part.end(), source.substring(per.end));
parts.add(i, after);
}

RecognizedPart recognized;
if (per.isMonthName) {
recognized = new RecognizedMonthNamePart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end));
} else {
recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end));
}
parts.add(i, recognized);

if (per.start != 0) {
NewPart before = new NewPart(part.start(), part.start() + per.start, source.substring(0, per.start));
parts.add(i, before);
}

return new LookupResult(per.extractorName, parts, true);
}
}
}
return new LookupResult("<none>", parts, false);
return partExtractor.extract(parts);
}

private List<Part> markAsUnrecognized(List<Part> parts) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;

public class AllYMDPatternExtractor implements PartExtractor {
public class AllYMDPatternExtractor extends PartExtractor {

private final List<AnyYMDPatternExtractor> extractors = new ArrayList<>();

Expand Down
155 changes: 125 additions & 30 deletions src/main/java/liqp/filters/date/fuzzy/extractors/MonthDateExtractor.java
Original file line number Diff line number Diff line change
@@ -1,59 +1,154 @@
package liqp.filters.date.fuzzy.extractors;

import static liqp.LValue.isBlank;

import java.util.List;
import java.util.regex.Matcher;
import liqp.filters.date.fuzzy.LookupResult;
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.Part.NewPart;
import liqp.filters.date.fuzzy.Part.RecognizedMonthNamePart;
import liqp.filters.date.fuzzy.Part.RecognizedPart;
import liqp.filters.date.fuzzy.PartExtractor;

public class MonthDateExtractor implements PartExtractor {
public class MonthDateExtractor extends PartExtractor {

enum Mode {
SPACES_ONLY,
/**
* not ",;" so far
*/
NON_SEPARATORS
}
enum Direction {
LEFT,
RIGHT
}
@Override
public PartExtractorResult extract(String source, List<Part> parts, int i) {
// closest right or closest left should be a month
if (rightIsMonth(parts, i)) {
return leftDateExtractor.extract(source, parts, i);
public LookupResult extract(List<Part> parts) {
// 1. find named month
// if not - return empty result
// then look both left and right for a day
// comparing them (left vs right) by priority
// so the situation like ' 11 december, 11 ' vs '11, december 11' will be resolved
int monthIndex = lookForNamedMonth(parts);
if (monthIndex == -1) {
return new LookupResult("MonthDateExtractor", parts, false);
}

LookupResult rightResult = rightDate(monthIndex, parts, Mode.SPACES_ONLY);
LookupResult leftResult = leftDate(monthIndex, parts, Mode.SPACES_ONLY);

if (rightResult.found && leftResult.found) {
throw new IllegalArgumentException("Month have date candidates on both sides");
}
if (leftResult.found) {
return leftResult;
}
if (rightResult.found) {
return rightResult;
}


rightResult = rightDate(monthIndex, parts, Mode.NON_SEPARATORS);
leftResult = leftDate(monthIndex, parts, Mode.NON_SEPARATORS);
if (rightResult.found && leftResult.found) {
throw new IllegalArgumentException("Month have date candidates on both sides");
}
if (rightResult.found) {
return rightResult;
}
if (leftIsMonth(parts, i)) {
return rightDateExtractor.extract(source, parts, i);
if (leftResult.found) {
return leftResult;
}
return new PartExtractorResult("MonthDateExtractor");


return new LookupResult("MonthDateExtractor", parts, false);
}

private boolean leftIsMonth(List<Part> parts, int i) {
int left = i - 1;
while (left >= 0) {
Part part = parts.get(left);
if (part instanceof RecognizedMonthNamePart) {
return true;
private LookupResult leftDate(int monthIndex, List<Part> parts, Mode mode) {
RegexPartExtractor extractor = getExtractorByModeAndDirection(mode, Direction.LEFT);

int index = monthIndex - 1;
while (index >= 0) {
LookupResult result = locateDate(parts, extractor, index);
if (result != null) {
return result;
}
if (part instanceof RecognizedPart) {
return false;
index--;
}

return new LookupResult("MonthDateExtractor", parts, false);
}

private LookupResult rightDate(int monthIndex, List<Part> parts, Mode mode) {
RegexPartExtractor extractor = getExtractorByModeAndDirection(mode, Direction.RIGHT);

int index = monthIndex + 1;
while (index < parts.size()) {
LookupResult result = locateDate(parts, extractor, index);
if (result != null) {
return result;
}
left--;
index++;
}
return false;

return new LookupResult("MonthDateExtractor", parts, false);
}

private boolean rightIsMonth(List<Part> parts, int i) {
int right = i + 1;
while (right < parts.size()) {
Part part = parts.get(right);
if (part instanceof RecognizedMonthNamePart) {
return true;
private LookupResult locateDate(List<Part> parts, RegexPartExtractor extractor, int index) {
Part part = parts.get(index);
if (part instanceof RecognizedPart) {
return new LookupResult("MonthDateExtractor", parts, false);
}
if (part instanceof NewPart) {
NewPart newPart = (NewPart) part;
String source = newPart.source();
if (!isBlank(source) && extractor != null) {
PartExtractorResult leftResult = extractor.extract(source, parts, index);
if (leftResult.found) {
return getLookupResult(parts, index, leftResult);
}
}
if (part instanceof RecognizedPart) {
return false;
}
return null;
}

private RegexPartExtractor getExtractorByModeAndDirection(Mode mode, Direction direction) {
if (direction == Direction.LEFT) {
if (mode == Mode.SPACES_ONLY) {
return leftDateSpacesOnlyExtractor;
} else if (mode == Mode.NON_SEPARATORS) {
return leftDateExtractor;
}
} else {
if (mode == Mode.SPACES_ONLY) {
return rightDateSpacesOnlyExtractor;
} else if (mode == Mode.NON_SEPARATORS) {
return rightDateExtractor;
}
right++;
}
return false;
return null;
}

private int lookForNamedMonth(List<Part> parts) {
for (int i = 0; i < parts.size(); i++) {
Part part = parts.get(i);
if (part instanceof RecognizedMonthNamePart) {
return i;
}
}
return -1;
}

private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left", "(?:^|.*?\\D)(?<day>0?[1-9]|[12][0-9]|3[01])\\D+?$");
private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right", "^\\D+?(?<day>0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
private static final RegexPartExtractor leftDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
"(?:^|.*?\\D)(?<day>0?[1-9]|[12][0-9]|3[01])[^,\\d;]+?$");
private static final RegexPartExtractor leftDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.left",
"(?:^|.*?\\D)(?<day>0?[1-9]|[12][0-9]|3[01])\\s+?$");
private static final RegexPartExtractor rightDateExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
"^[^,\\d;]+?(?<day>0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
private static final RegexPartExtractor rightDateSpacesOnlyExtractor = new MonthDatePartExtractor("MonthDayExtractor.right",
"^\\s+?(?<day>0?[1-9]|[12][0-9]|3[01])(?:$|\\D.*?)");
private static class MonthDatePartExtractor extends RegexPartExtractor {

public MonthDatePartExtractor(String name, String regex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;

public class MonthExtractor implements PartExtractor {
public class MonthExtractor extends PartExtractor {
private final List<EnumExtractor> monthExtractors;

public MonthExtractor(Locale locale) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;

public class PartExtractorDelegate implements PartExtractor {
public class PartExtractorDelegate extends PartExtractor {

protected PartExtractor delegate;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import liqp.filters.date.fuzzy.Part;
import liqp.filters.date.fuzzy.PartExtractor;

class RegexPartExtractor implements PartExtractor {
class RegexPartExtractor extends PartExtractor {

protected final String name;
protected final Pattern pattern;
Expand Down
Loading

0 comments on commit 0f823b9

Please sign in to comment.