diff --git a/PCRE2_API.md b/PCRE2_API.md index 34faa33..fcc1fa3 100644 --- a/PCRE2_API.md +++ b/PCRE2_API.md @@ -59,7 +59,7 @@ Here's the list of the PCRE2 API functions exposed via `org.pcre4j.api.IPcre2` a | | [pcre2_set_heap_limit](https://www.pcre.org/current/doc/html/pcre2_set_heap_limit.html) | Set the match backtracking heap limit | | | [pcre2_set_match_limit](https://www.pcre.org/current/doc/html/pcre2_set_match_limit.html) | Set the match limit | | | [pcre2_set_max_pattern_length](https://www.pcre.org/current/doc/html/pcre2_set_max_pattern_length.html) | Set the maximum length of pattern | -| | [pcre2_set_newline](https://www.pcre.org/current/doc/html/pcre2_set_newline.html) | Set the newline convention | +| ✅ | [pcre2_set_newline](https://www.pcre.org/current/doc/html/pcre2_set_newline.html) | Set the newline convention | | | [pcre2_set_offset_limit](https://www.pcre.org/current/doc/html/pcre2_set_offset_limit.html) | Set the offset limit | | | [pcre2_set_parens_nest_limit](https://www.pcre.org/current/doc/html/pcre2_set_parens_nest_limit.html) | Set the parentheses nesting limit | | | [pcre2_set_recursion_limit](https://www.pcre.org/current/doc/html/pcre2_set_recursion_limit.html) | Obsolete: use pcre2_set_depth_limit | diff --git a/api/src/main/java/org/pcre4j/api/IPcre2.java b/api/src/main/java/org/pcre4j/api/IPcre2.java index 29eec27..d007df3 100644 --- a/api/src/main/java/org/pcre4j/api/IPcre2.java +++ b/api/src/main/java/org/pcre4j/api/IPcre2.java @@ -265,11 +265,34 @@ public interface IPcre2 { public static final int CONVERT_GLOB_NO_WILD_SEPARATOR = 0x00000030; public static final int CONVERT_GLOB_NO_STARSTAR = 0x00000050; + /** + * Carriage return only (\r) + */ public static final int NEWLINE_CR = 1; + + /** + * Linefeed only (\n) + */ public static final int NEWLINE_LF = 2; + + /** + * CR followed by LF only (\r\n) + */ public static final int NEWLINE_CRLF = 3; + + /** + * Any Unicode newline sequence + */ public static final int NEWLINE_ANY = 4; + + /** + * Any of {@link #NEWLINE_CR}, {@link #NEWLINE_LF}, or {@link #NEWLINE_CRLF} + */ public static final int NEWLINE_ANYCRLF = 5; + + /** + * NUL character (\0) + */ public static final int NEWLINE_NUL = 6; /** @@ -924,4 +947,13 @@ public interface IPcre2 { * @param ovector the array to store the output vector */ public void getOvector(long matchData, long[] ovector); + + /** + * Set the newline convention within a compile context + * + * @param ccontext the compile context handle + * @param newline the newline convention + * @return 0 on success, otherwise a negative error code + */ + public int setNewline(long ccontext, int newline); } diff --git a/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java index f5cc9e8..4b59951 100644 --- a/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java +++ b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java @@ -61,6 +61,8 @@ public class Pcre2 implements IPcre2 { private final MethodHandle pcre2_get_ovector_count; private final MethodHandle pcre2_get_ovector_pointer; + private final MethodHandle pcre2_set_newline; + /** * Constructs a new PCRE2 API using the common library name "pcre2-8". */ @@ -272,6 +274,14 @@ public Pcre2(String library, String suffix) { ValueLayout.ADDRESS // pcre2_match_data* ) ); + + pcre2_set_newline = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_set_newline" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.JAVA_INT, // int + ValueLayout.ADDRESS, // pcre2_compile_context* + ValueLayout.JAVA_INT // int + ) + ); } @Override @@ -783,4 +793,18 @@ public void getOvector(long matchData, long[] ovector) { throw new RuntimeException(e); } } + + @Override + public int setNewline(long ccontext, int newline) { + try (var arena = Arena.ofConfined()) { + final var pCContext = MemorySegment.ofAddress(ccontext); + + return (int) pcre2_set_newline.invokeExact( + pCContext, + newline + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } } diff --git a/jna/src/main/java/org/pcre4j/jna/Pcre2.java b/jna/src/main/java/org/pcre4j/jna/Pcre2.java index a4e693f..36eb0ac 100644 --- a/jna/src/main/java/org/pcre4j/jna/Pcre2.java +++ b/jna/src/main/java/org/pcre4j/jna/Pcre2.java @@ -330,6 +330,11 @@ public void getOvector(long matchData, long[] ovector) { pOvector.read(0, ovector, 0, ovector.length); } + @Override + public int setNewline(long ccontext, int value) { + return library.pcre2_set_newline(new Pointer(ccontext), value); + } + private interface Library extends com.sun.jna.Library { int pcre2_config(int what, Pointer where); @@ -397,6 +402,8 @@ int pcre2_match( int pcre2_get_ovector_count(Pointer matchData); Pointer pcre2_get_ovector_pointer(Pointer matchData); + + int pcre2_set_newline(Pointer ccontext, int value); } private record SuffixFunctionMapper(String suffix) implements FunctionMapper { diff --git a/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java b/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java index 17404e3..6b60c91 100644 --- a/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java +++ b/lib/src/main/java/org/pcre4j/Pcre2CompileContext.java @@ -87,6 +87,22 @@ public long handle() { return handle; } + /** + * Set the newline convention + * + * @param newline the newline convention + */ + public void setNewline(Pcre2Newline newline) { + if (newline == null) { + throw new IllegalArgumentException("newline cannot be null"); + } + final var result = api.setNewline(handle, newline.value()); + if (result != 0) { + final var errorMessage = Pcre4jUtils.getErrorMessage(api, result); + throw new RuntimeException("Failed set the newline convention", new IllegalStateException(errorMessage)); + } + } + private record Clean(IPcre2 api, long compileContext) implements Runnable { @Override public void run() { diff --git a/lib/src/main/java/org/pcre4j/Pcre2Newline.java b/lib/src/main/java/org/pcre4j/Pcre2Newline.java index 9905544..0d150b4 100644 --- a/lib/src/main/java/org/pcre4j/Pcre2Newline.java +++ b/lib/src/main/java/org/pcre4j/Pcre2Newline.java @@ -20,12 +20,35 @@ import java.util.Optional; public enum Pcre2Newline { - NEWLINE_CR(IPcre2.NEWLINE_CR), - NEWLINE_LF(IPcre2.NEWLINE_LF), - NEWLINE_CRLF(IPcre2.NEWLINE_CRLF), - NEWLINE_ANY(IPcre2.NEWLINE_ANY), - NEWLINE_ANYCRLF(IPcre2.NEWLINE_ANYCRLF), - NEWLINE_NUL(IPcre2.NEWLINE_NUL); + /** + * Carriage return only (\r) + */ + CR(IPcre2.NEWLINE_CR), + + /** + * Linefeed only (\n) + */ + LF(IPcre2.NEWLINE_LF), + + /** + * CR followed by LF only (\r\n) + */ + CRLF(IPcre2.NEWLINE_CRLF), + + /** + * Any Unicode newline sequence + */ + ANY(IPcre2.NEWLINE_ANY), + + /** + * Any of {@link #CR}, {@link #LF}, or {@link #CRLF} + */ + ANYCRLF(IPcre2.NEWLINE_ANYCRLF), + + /** + * NUL character (\0) + */ + NUL(IPcre2.NEWLINE_NUL); /** * The integer value diff --git a/regex/src/main/java/org/pcre4j/regex/Pattern.java b/regex/src/main/java/org/pcre4j/regex/Pattern.java index 8643a7a..b525a48 100644 --- a/regex/src/main/java/org/pcre4j/regex/Pattern.java +++ b/regex/src/main/java/org/pcre4j/regex/Pattern.java @@ -60,10 +60,14 @@ public class Pattern { */ public static final int UNICODE_CHARACTER_CLASS = java.util.regex.Pattern.UNICODE_CHARACTER_CLASS; + /** + * A {@link java.util.regex.Pattern#UNIX_LINES}-compatible flag implemented via {@link org.pcre4j.Pcre2Newline#LF} + */ + public static final int UNIX_LINES = java.util.regex.Pattern.UNIX_LINES; + // TODO: public static final int CANON_EQ = java.util.regex.Pattern.CANON_EQ; // TODO: public static final int COMMENTS = java.util.regex.Pattern.COMMENTS; // TODO: public static final int UNICODE_CASE = java.util.regex.Pattern.UNICODE_CASE; - // TODO: public static final int UNIX_LINES = java.util.regex.Pattern.UNIX_LINES; /* package-private */ final Pcre2Code code; /* package-private */ final Pcre2Code matchingCode; /* package-private */ final Pcre2Code lookingAtCode; @@ -108,6 +112,13 @@ private Pattern(IPcre2 api, String regex, int flags) { compileOptions.add(Pcre2CompileOption.UCP); } + final var compileContext = new Pcre2CompileContext(api, null); + if ((flags & UNIX_LINES) != 0) { + compileContext.setNewline(Pcre2Newline.LF); + } else { + compileContext.setNewline(Pcre2Newline.ANY); + } + try { if (Pcre4jUtils.isJitSupported(api)) { this.code = new Pcre2JitCode( @@ -115,7 +126,7 @@ private Pattern(IPcre2 api, String regex, int flags) { regex, compileOptions, EnumSet.of(Pcre2JitOption.COMPLETE), - null + compileContext ); final var matchingCompileOptions = EnumSet.copyOf(compileOptions); @@ -126,7 +137,7 @@ private Pattern(IPcre2 api, String regex, int flags) { regex, matchingCompileOptions, EnumSet.of(Pcre2JitOption.COMPLETE), - null + compileContext ); final var lookingAtCompileOptions = EnumSet.copyOf(compileOptions); @@ -136,14 +147,14 @@ private Pattern(IPcre2 api, String regex, int flags) { regex, lookingAtCompileOptions, EnumSet.of(Pcre2JitOption.COMPLETE), - null + compileContext ); } else { this.code = new Pcre2Code( api, regex, compileOptions, - null + compileContext ); this.matchingCode = null; this.lookingAtCode = null; diff --git a/regex/src/test/java/org/pcre4j/regex/PatternTests.java b/regex/src/test/java/org/pcre4j/regex/PatternTests.java index 27f471e..e8cd5d1 100644 --- a/regex/src/test/java/org/pcre4j/regex/PatternTests.java +++ b/regex/src/test/java/org/pcre4j/regex/PatternTests.java @@ -21,8 +21,7 @@ import java.util.stream.Stream; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.*; /** * Tests to ensure API likeness of the {@link Pattern} to the {@link java.util.regex.Pattern}. @@ -105,4 +104,43 @@ void withUnicodeCharacterClass(IPcre2 api) { assertEquals(javaMatcher.group(), pcre4jMatcher.group()); } + @ParameterizedTest + @MethodSource("parameters") + void withoutUnixNewline(IPcre2 api) { + var regex = "^A$"; + var input = "A\u0085B"; + var javaMatcher = java.util.regex.Pattern.compile( + regex, + java.util.regex.Pattern.MULTILINE + ).matcher(input); + var pcre4jMatcher = Pattern.compile( + api, + regex, + Pattern.MULTILINE + ).matcher(input); + + assertEquals(javaMatcher.find(), pcre4jMatcher.find()); + assertEquals(javaMatcher.group(), pcre4jMatcher.group()); + } + + @ParameterizedTest + @MethodSource("parameters") + void withUnixNewline(IPcre2 api) { + var regex = "^A$"; + var input = "A\u0085B"; + var javaMatcher = java.util.regex.Pattern.compile( + regex, + java.util.regex.Pattern.MULTILINE | java.util.regex.Pattern.UNIX_LINES + ).matcher(input); + var pcre4jMatcher = Pattern.compile( + api, + regex, + Pattern.MULTILINE | Pattern.UNIX_LINES + ).matcher(input); + + assertEquals(javaMatcher.find(), pcre4jMatcher.find()); + assertThrows(IllegalStateException.class, javaMatcher::group); + assertThrows(IllegalStateException.class, pcre4jMatcher::group); + } + }