Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
vkhrystiuk-ks committed Dec 22, 2024
1 parent 7e66c66 commit 957e039
Show file tree
Hide file tree
Showing 14 changed files with 80 additions and 83 deletions.
5 changes: 3 additions & 2 deletions src/main/java/liqp/filters/date/fuzzy/LookupResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@

class LookupResult {

private final String name;
final List<Part> parts;
final boolean found;

LookupResult(List<Part> parts, boolean found) {
LookupResult(String name, List<Part> parts, boolean found) {
this.name = name;
this.parts = parts;
this.found = found;
}

}
4 changes: 3 additions & 1 deletion src/main/java/liqp/filters/date/fuzzy/Part.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,13 @@ class RecognizedPart implements Part {
final int start;
final int end;
private final List<String> patterns;
public final String source;

RecognizedPart(int start, int end, List<String> patterns) {
RecognizedPart(int start, int end, List<String> patterns, String source) {
this.start = start;
this.end = end;
this.patterns = patterns;
this.source = source;
}

@Override
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/liqp/filters/date/fuzzy/PartRecognizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,19 +125,19 @@ private LookupResult lookup(List<Part> parts, PartExtractor partExtractor) {
parts.add(i, after);
}

RecognizedPart recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns);
RecognizedPart recognized = new RecognizedPart(part.start() + per.start, part.start() + per.end, per.formatterPatterns, source.substring(per.start, per.end));
parts.add(i, recognized);

if (per.start != 0) {
NewPart before = new NewPart(part.start(), part.start() + per.start, source.substring(0, per.start));
parts.add(i, before);
}

return new LookupResult(parts, true);
return new LookupResult(per.extractorName, parts, true);
}
}
}
return new LookupResult(parts, false);
return new LookupResult("<none>", parts, false);
}

private List<Part> markAsUnrecognized(List<Part> parts) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,52 +15,39 @@ public class AllYMDPatternExtractor implements PartExtractor {
private final List<AnyYMDPatternExtractor> extractors = new ArrayList<>();

public AllYMDPatternExtractor() {
AnyYMDPatternExtractor iSO8601Y4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy-MM-dd
extractors.add(iSO8601Y4MDPatternExtractor);
AnyYMDPatternExtractor americanY4MDPatternExtractor = new AnyYMDPatternExtractor(
pM(), pp("/"), pD(), pp("/"), pY4()); // MM/dd/yyyy
extractors.add(americanY4MDPatternExtractor);
extractors.add(new AnyYMDPatternExtractor("iSO8601Y4MDPatternExtractor",
pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd

extractors.add(new AnyYMDPatternExtractor("americanY4MDPatternExtractor",
pM(), pp("/"), pD(), pp("/"), pY4())); // MM/dd/yyyy
// next are top-rated locale formats, according to gpt
AnyYMDPatternExtractor indianY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("-"), pM(), pp("-"), pY4()); // d-M-yyyy
extractors.add(indianY4MDPatternExtractor);
AnyYMDPatternExtractor chineseY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("/"), pM(), pp("/"), pD()); // yyyy/M/d
extractors.add(chineseY4MDPatternExtractor);
AnyYMDPatternExtractor englishY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("/"), pM(), pp("/"), pY4()); // d/M/yyyy
extractors.add(englishY4MDPatternExtractor);
AnyYMDPatternExtractor slavicY4MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("."), pM(), pp("."), pY4());
extractors.add(slavicY4MDPatternExtractor);
AnyYMDPatternExtractor coldEuropeY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy-MM-dd
extractors.add(coldEuropeY4MDPatternExtractor);
AnyYMDPatternExtractor espanaY4MDPatternExtractor = new AnyYMDPatternExtractor(
pY4(), pp("-"), pM(), pp("-"), pD()); // yyyy/MM/dd
extractors.add(espanaY4MDPatternExtractor);
AnyYMDPatternExtractor americanY2MDPatternExtractor = new AnyYMDPatternExtractor(
pM(), pp("/"), pD(), pp("/"), pY4()); // MM/dd/yy
extractors.add(americanY2MDPatternExtractor);
AnyYMDPatternExtractor indianY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("-"), pM(), pp("-"), pY2()); // d-M-yy
extractors.add(indianY2MDPatternExtractor);
AnyYMDPatternExtractor chineseY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("/"), pM(), pp("/"), pD()); // yyyy/M/d
extractors.add(chineseY2MDPatternExtractor);
AnyYMDPatternExtractor englishY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("/"), pM(), pp("/"), pY2()); // d/M/yy
extractors.add(englishY2MDPatternExtractor);
AnyYMDPatternExtractor slavicY2MDPatternExtractor = new AnyYMDPatternExtractor(
pD(), pp("."), pM(), pp("."), pY2());
extractors.add(slavicY2MDPatternExtractor);
AnyYMDPatternExtractor coldEuropeY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("-"), pM(), pp("-"), pD()); // yy-MM-dd
extractors.add(coldEuropeY2MDPatternExtractor);
AnyYMDPatternExtractor espanaY2MDPatternExtractor = new AnyYMDPatternExtractor(
pY2(), pp("-"), pM(), pp("-"), pD()); // yy/MM/dd
extractors.add(espanaY2MDPatternExtractor);
extractors.add(new AnyYMDPatternExtractor("indianY4MDPatternExtractor",
pD(), pp("-"), pM(), pp("-"), pY4())); // d-M-yyyy
extractors.add(new AnyYMDPatternExtractor("chineseY4MDPatternExtractor",
pY4(), pp("/"), pM(), pp("/"), pD())); // yyyy/M/d
extractors.add(new AnyYMDPatternExtractor("englishY4MDPatternExtractor",
pD(), pp("/"), pM(), pp("/"), pY4())); // d/M/yyyy
extractors.add(new AnyYMDPatternExtractor("slavicY4MDPatternExtractor",
pD(), pp("."), pM(), pp("."), pY4())); // dd.MM.yyyy
extractors.add(new AnyYMDPatternExtractor("coldEuropeY4MDPatternExtractor",
pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy-MM-dd
extractors.add(new AnyYMDPatternExtractor("espanaY4MDPatternExtractor",
pY4(), pp("-"), pM(), pp("-"), pD())); // yyyy/MM/dd

extractors.add(new AnyYMDPatternExtractor("americanY2MDPatternExtractor",
pM(), pp("/"), pD(), pp("/"), pY2())); // MM/dd/yy
extractors.add(new AnyYMDPatternExtractor("indianY2MDPatternExtractor",
pD(), pp("-"), pM(), pp("-"), pY2())); // d-M-yy
extractors.add(new AnyYMDPatternExtractor("chineseY2MDPatternExtractor",
pY2(), pp("/"), pM(), pp("/"), pD())); // yy/M/d
extractors.add(new AnyYMDPatternExtractor("englishY2MDPatternExtractor",
pD(), pp("/"), pM(), pp("/"), pY2())); // d/M/yy
extractors.add(new AnyYMDPatternExtractor("slavicY2MDPatternExtractor",
pD(), pp("."), pM(), pp("."), pY2())); // dd.MM.yy
extractors.add(new AnyYMDPatternExtractor("coldEuropeY2MDPatternExtractor",
pY2(), pp("-"), pM(), pp("-"), pD())); // yy-MM-dd
extractors.add(new AnyYMDPatternExtractor("espanaY2MDPatternExtractor",
pY2(), pp("-"), pM(), pp("-"), pD())); // yy/MM/dd
}

@Override
Expand All @@ -71,6 +58,6 @@ public PartExtractorResult extract(String source) {
return result;
}
}
return new PartExtractorResult();
return new PartExtractorResult("AllYMDPatternExtractor");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,10 @@ static RulePart pM() {
static RulePart pD() {
return new RulePart(RuleType.D, (Integer)null);
}

private final RulePart[] partsInOrder;
protected AnyYMDPatternExtractor(RulePart... partsInOrder) {
super(reconstructPattern(partsInOrder), null);
protected AnyYMDPatternExtractor(String name, RulePart... partsInOrder) {
super(name, reconstructPattern(partsInOrder), null);
this.partsInOrder = partsInOrder;
}

Expand Down Expand Up @@ -80,14 +81,14 @@ private static String reconstructPattern(RulePart[] partsInOrder) {
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult();
PartExtractorResult result = new PartExtractorResult(name);
result.found = true;
result.start = matcher.start(findFirstGroupName());
result.end = matcher.end(findLastGroupName());
result.formatterPatterns = getPatterns(matcher);
return result;
}
return new PartExtractorResult();
return new PartExtractorResult(name);
}

private String findLastGroupName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@

abstract class EnumExtractor extends PartExtractorDelegate {

public EnumExtractor(Locale locale, String formatterPattern) {
public EnumExtractor(String name, Locale locale, String formatterPattern) {
if (locale == null || Locale.ROOT.equals(locale)) {
locale = Locale.US;
}
String[] values = withoutNulls(getEnumValues(locale), locale);
String valuesPattern = String.join("|", values);
super.delegate = new RegexPartExtractor("(?:^|.*?[^\\w_])(" + valuesPattern + ")(?:$|[^\\w_].*?)",
super.delegate = new RegexPartExtractor(name,"(?:^|.*?[^\\w_])(" + valuesPattern + ")(?:$|[^\\w_].*?)",
formatterPattern);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public enum Extractors {
private final Map<Locale, PartExtractor> extractors = new HashMap<>();
@Override
public PartExtractor get(Locale locale) {
return extractors.computeIfAbsent(locale, l -> new EnumExtractor(locale, "EEEE") {
return extractors.computeIfAbsent(locale, l -> new EnumExtractor("fullWeekdaysExtractor", locale, "EEEE") {
@Override
protected String[] getEnumValues(Locale locale) {
return new DateFormatSymbols(locale).getWeekdays();
Expand All @@ -23,7 +23,7 @@ protected String[] getEnumValues(Locale locale) {
private final Map<Locale, PartExtractor> extractors = new HashMap<>();
@Override
public PartExtractor get(Locale locale) {
return extractors.computeIfAbsent(locale, l -> new EnumExtractor(locale, "EEE") {
return extractors.computeIfAbsent(locale, l -> new EnumExtractor("shortWeekdaysExtractor", locale, "EEE") {
@Override
protected String[] getEnumValues(Locale locale) {
return new DateFormatSymbols(locale).getShortWeekdays();
Expand All @@ -46,7 +46,7 @@ public PartExtractor get(Locale locale) {
}
},
plainYearExtractor{
private final PartExtractor partExtractor = new RegexPartExtractor(".*\\b?(\\d{4})\\b?.*", "yyyy");
private final PartExtractor partExtractor = new RegexPartExtractor("plainYearExtractor", ".*\\b?(\\d{4})\\b?.*", "yyyy");
@Override
public PartExtractor get(Locale locale) {
return partExtractor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class FullMonthExtractor extends EnumExtractor {

public FullMonthExtractor(Locale locale) {
super(locale, "MMMM");
super("FullMonthExtractor", locale, "MMMM");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
import java.util.List;

public class PartExtractorResult {
public PartExtractorResult(){}
public PartExtractorResult(List<String> formatterPatterns){
this.formatterPatterns = formatterPatterns;

public final String extractorName;

public PartExtractorResult(String extractorName){
this.extractorName = extractorName;
this.formatterPatterns = new ArrayList<>();
}
public PartExtractorResult(String formatterPattern){
public PartExtractorResult(String extractorName, String formatterPattern){
this.extractorName = extractorName;
this.formatterPatterns = new ArrayList<>();
this.formatterPatterns.add(formatterPattern);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@

class RegexPartExtractor implements PartExtractor {

protected final String name;
protected final Pattern pattern;
protected final String formatterPattern;

public RegexPartExtractor(String regex, String formatterPattern) {
public RegexPartExtractor(String name, String regex, String formatterPattern) {
this.name = name;
this.pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
this.formatterPattern = formatterPattern;
}
Expand All @@ -18,12 +20,12 @@ public RegexPartExtractor(String regex, String formatterPattern) {
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult(formatterPattern);
PartExtractorResult result = new PartExtractorResult(name, formatterPattern);
result.found = true;
result.start = matcher.start(1);
result.end = matcher.end(1);
return result;
}
return new PartExtractorResult();
return new PartExtractorResult(name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class RegularTimeExtractor extends RegexPartExtractor {

public RegularTimeExtractor() {
super("(?:^|.*?\\D)"
super("RegularTimeExtractor", "(?:^|.*?\\D)"
+ "("
+ "(?<hours>(\\d|0\\d|1\\d|2[0-3]))"
+ ":"
Expand All @@ -23,7 +23,7 @@ public RegularTimeExtractor() {
public PartExtractorResult extract(String source) {
Matcher m = pattern.matcher(source);
if (m.matches()) {
PartExtractorResult r = new PartExtractorResult();
PartExtractorResult r = new PartExtractorResult("RegularTimeExtractor");
r.found = true;

String ampmPart = "";
Expand Down Expand Up @@ -70,7 +70,7 @@ public PartExtractorResult extract(String source) {
r.formatterPatterns = newList(resPattern);
return r;
}
return new PartExtractorResult();
return new PartExtractorResult("RegularTimeExtractor");
}

static String repeat(String key, int count) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class ShortMonthExtractor extends EnumExtractor {

public ShortMonthExtractor(Locale locale) {
super(locale, "MMM");
super("ShortMonthExtractor", locale, "MMM");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
class YearWithEra extends RegexPartExtractor {

public YearWithEra() {
super("(?:^|.*?\\D)(?<year>\\d+)(?<eraSeparator>\\s*)(?<era>AD|BC|Anno Domini|Before Christ)(?:$|\\D.*?)",
super("YearWithEra", "(?:^|.*?\\D)(?<year>\\d+)(?<eraSeparator>\\s*)(?<era>AD|BC|Anno Domini|Before Christ)(?:$|\\D.*?)",
null);
}

@Override
public PartExtractorResult extract(String source) {
Matcher matcher = pattern.matcher(source);
if (matcher.find()) {
PartExtractorResult result = new PartExtractorResult();
PartExtractorResult result = new PartExtractorResult("YearWithEra");
result.found = true;
result.start = matcher.start("year");
String resPattern = repeat("y", matcher.group("year").length());
Expand All @@ -38,6 +38,6 @@ public PartExtractorResult extract(String source) {
result.formatterPatterns = newList(resPattern);
return result;
}
return new PartExtractorResult();
return new PartExtractorResult("YearWithEra");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ public static Collection<Object[]> data() {
{null, "2024-1-5 08:15 ", "yyyy-M-d HH:mm "},
{null, "2024-12-25 14:45 ", "yyyy-MM-dd HH:mm "},
{null, "2024-12-25 14:45:30 ", "yyyy-MM-dd HH:mm:ss "},
{null, "1/1/23 ", "d/M/yy "},
{null, "1/1/2023 ", "d/M/yyyy "},
{null, "01/01/23 ", "dd/MM/yy "},
{null, "01/01/2023 ", "dd/MM/yyyy "},
{null, "1/1/23 12:34 ", "d/M/yy HH:mm "},
{null, "1/1/2023 12:34 ", "d/M/yyyy HH:mm "},
{null, "01/01/23 12:34 ", "dd/MM/yy HH:mm "},
{null, "1/1/23 ", "M/d/yy "},
{null, "1/1/2023 ", "M/d/yyyy "},
{null, "01/01/23 ", "MM/dd/yy "},
{null, "01/01/2023 ", "MM/dd/yyyy "},
{null, "1/1/23 12:34 ", "M/d/yy HH:mm "},
{null, "1/1/2023 12:34 ", "M/d/yyyy HH:mm "},
{null, "01/01/23 12:34 ", "MM/dd/yy HH:mm "},

});
}
Expand Down

0 comments on commit 957e039

Please sign in to comment.