From ac65bfee9aef33d67209097d78ec9a084a970bb3 Mon Sep 17 00:00:00 2001 From: Oleksii PELYKH Date: Tue, 25 Jun 2024 23:12:40 +0200 Subject: [PATCH] (feat) JIT stack --- PCRE2_API.md | 6 +- api/src/main/java/org/pcre4j/api/IPcre2.java | 28 ++++- ffm/src/main/java/org/pcre4j/ffm/Pcre2.java | 77 +++++++++++++ jna/src/main/java/org/pcre4j/jna/Pcre2.java | 22 ++++ .../main/java/org/pcre4j/Pcre2JitStack.java | 103 ++++++++++++++++++ .../java/org/pcre4j/Pcre2MatchContext.java | 15 +++ .../main/java/org/pcre4j/regex/Matcher.java | 35 ++++-- 7 files changed, 275 insertions(+), 11 deletions(-) create mode 100644 lib/src/main/java/org/pcre4j/Pcre2JitStack.java diff --git a/PCRE2_API.md b/PCRE2_API.md index fcc1fa3..687f0a8 100644 --- a/PCRE2_API.md +++ b/PCRE2_API.md @@ -30,9 +30,9 @@ Here's the list of the PCRE2 API functions exposed via `org.pcre4j.api.IPcre2` a | ✅ | [pcre2_jit_compile](https://www.pcre.org/current/doc/html/pcre2_jit_compile.html) | Process a compiled pattern with the JIT compiler | | | [pcre2_jit_free_unused_memory](https://www.pcre.org/current/doc/html/pcre2_jit_free_unused_memory.html) | Free unused JIT memory | | ✅ | [pcre2_jit_match](https://www.pcre.org/current/doc/html/pcre2_jit_match.html) | Fast path interface to JIT matching | -| | [pcre2_jit_stack_assign](https://www.pcre.org/current/doc/html/pcre2_jit_stack_assign.html) | Assign stack for JIT matching | -| | [pcre2_jit_stack_create](https://www.pcre.org/current/doc/html/pcre2_jit_stack_create.html) | Create a stack for JIT matching | -| | [pcre2_jit_stack_free](https://www.pcre.org/current/doc/html/pcre2_jit_stack_free.html) | Free a JIT matching stack | +| ✅ | [pcre2_jit_stack_assign](https://www.pcre.org/current/doc/html/pcre2_jit_stack_assign.html) | Assign stack for JIT matching | +| ✅ | [pcre2_jit_stack_create](https://www.pcre.org/current/doc/html/pcre2_jit_stack_create.html) | Create a stack for JIT matching | +| ✅ | [pcre2_jit_stack_free](https://www.pcre.org/current/doc/html/pcre2_jit_stack_free.html) | Free a JIT matching stack | | | [pcre2_maketables](https://www.pcre.org/current/doc/html/pcre2_maketables.html) | Build character tables in current locale | | | [pcre2_maketables_free](https://www.pcre.org/current/doc/html/pcre2_maketables_free.html) | Free character tables | | ✅ | [pcre2_match](https://www.pcre.org/current/doc/html/pcre2_match.html) | Match a compiled pattern to a subject string (Perl compatible) | diff --git a/api/src/main/java/org/pcre4j/api/IPcre2.java b/api/src/main/java/org/pcre4j/api/IPcre2.java index d007df3..8b1582b 100644 --- a/api/src/main/java/org/pcre4j/api/IPcre2.java +++ b/api/src/main/java/org/pcre4j/api/IPcre2.java @@ -870,6 +870,32 @@ public interface IPcre2 { */ public int jitMatch(long code, String subject, int startoffset, int options, long matchData, long mcontext); + /** + * Create a JIT stack. + * + * @param startsize the initial stack size + * @param maxsize the maximum stack size + * @param gcontext the general context handle or 0 + * @return the JIT stack handle + */ + public long jitStackCreate(long startsize, long maxsize, long gcontext); + + /** + * Free a JIT stack. + * + * @param jitStack the JIT stack handle + */ + public void jitStackFree(long jitStack); + + /** + * Assign the JIT stack to a match context. + * + * @param mcontext the match context handle + * @param callback a callback function handle or 0 + * @param data a JIT stack handle or a value to be passed to the callback function + */ + public void jitStackAssign(long mcontext, long callback, long data); + /** * Create a new match data block. * @@ -952,7 +978,7 @@ public interface IPcre2 { * Set the newline convention within a compile context * * @param ccontext the compile context handle - * @param newline the newline convention + * @param newline the newline convention * @return 0 on success, otherwise a negative error code */ public int setNewline(long ccontext, int newline); diff --git a/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java index 4b59951..c09ed15 100644 --- a/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java +++ b/ffm/src/main/java/org/pcre4j/ffm/Pcre2.java @@ -47,6 +47,9 @@ public class Pcre2 implements IPcre2 { private final MethodHandle pcre2_jit_compile; private final MethodHandle pcre2_jit_match; + private final MethodHandle pcre2_jit_stack_create; + private final MethodHandle pcre2_jit_stack_free; + private final MethodHandle pcre2_jit_stack_assign; private final MethodHandle pcre2_match_data_create; private final MethodHandle pcre2_match_data_create_from_pattern; @@ -204,6 +207,31 @@ public Pcre2(String library, String suffix) { ) ); + pcre2_jit_stack_create = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_jit_stack_create" + suffix).orElseThrow(), + FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_jit_stack* + ValueLayout.ADDRESS, // PCRE2_SIZE + ValueLayout.ADDRESS, // PCRE2_SIZE + ValueLayout.ADDRESS // pcre2_general_context* + ) + ); + + pcre2_jit_stack_free = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_jit_stack_free" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS // pcre2_jit_stack* + ) + ); + + pcre2_jit_stack_assign = LINKER.downcallHandle( + SYMBOL_LOOKUP.find("pcre2_jit_stack_assign" + suffix).orElseThrow(), + FunctionDescriptor.ofVoid( + ValueLayout.ADDRESS, // pcre2_code* + ValueLayout.ADDRESS, // pcre2_jit_callback + ValueLayout.ADDRESS // void* + ) + ); + pcre2_match_data_create = LINKER.downcallHandle( SYMBOL_LOOKUP.find("pcre2_match_data_create" + suffix).orElseThrow(), FunctionDescriptor.of(ValueLayout.ADDRESS, // pcre2_match_data* @@ -645,6 +673,55 @@ public int jitMatch(long code, String subject, int startoffset, int options, lon } } + @Override + public long jitStackCreate(long startsize, long maxsize, long gcontext) { + try (var arena = Arena.ofConfined()) { + final var startSize = MemorySegment.ofAddress(startsize); + final var maxSize = MemorySegment.ofAddress(maxsize); + final var pGContext = MemorySegment.ofAddress(gcontext); + + final var pJitStack = (MemorySegment) pcre2_jit_stack_create.invokeExact( + startSize, + maxSize, + pGContext + ); + + return pJitStack.address(); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void jitStackFree(long jitStack) { + try (var arena = Arena.ofConfined()) { + final var pJitStack = MemorySegment.ofAddress(jitStack); + + pcre2_jit_stack_free.invokeExact( + pJitStack + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Override + public void jitStackAssign(long mcontext, long callback, long data) { + try (var arena = Arena.ofConfined()) { + final var pMContext = MemorySegment.ofAddress(mcontext); + final var pCallback = MemorySegment.ofAddress(callback); + final var pData = MemorySegment.ofAddress(data); + + pcre2_jit_stack_assign.invokeExact( + pMContext, + pCallback, + pData + ); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + @Override public long matchDataCreate(int ovecsize, long gcontext) { try (var arena = Arena.ofConfined()) { diff --git a/jna/src/main/java/org/pcre4j/jna/Pcre2.java b/jna/src/main/java/org/pcre4j/jna/Pcre2.java index 36eb0ac..f0550b5 100644 --- a/jna/src/main/java/org/pcre4j/jna/Pcre2.java +++ b/jna/src/main/java/org/pcre4j/jna/Pcre2.java @@ -262,6 +262,22 @@ public int jitMatch(long code, String subject, int startoffset, int options, lon ); } + @Override + public long jitStackCreate(long startSize, long maxSize, long gcontext) { + final var jitStack = library.pcre2_jit_stack_create(startSize, maxSize, new Pointer(gcontext)); + return Pointer.nativeValue(jitStack); + } + + @Override + public void jitStackFree(long stack) { + library.pcre2_jit_stack_free(new Pointer(stack)); + } + + @Override + public void jitStackAssign(long mcontext, long callback, long data) { + library.pcre2_jit_stack_assign(new Pointer(mcontext), new Pointer(callback), new Pointer(data)); + } + @Override public long matchDataCreate(int ovecsize, long gcontext) { Pointer matchData = library.pcre2_match_data_create(ovecsize, new Pointer(gcontext)); @@ -377,6 +393,12 @@ int pcre2_jit_match( Pointer mcontext ); + Pointer pcre2_jit_stack_create(long startSize, long maxSize, Pointer gcontext); + + void pcre2_jit_stack_free(Pointer stack); + + void pcre2_jit_stack_assign(Pointer mcontext, Pointer callback, Pointer data); + Pointer pcre2_match_data_create(int ovecsize, Pointer gcontext); Pointer pcre2_match_data_create_from_pattern(Pointer code, Pointer gcontext); diff --git a/lib/src/main/java/org/pcre4j/Pcre2JitStack.java b/lib/src/main/java/org/pcre4j/Pcre2JitStack.java new file mode 100644 index 0000000..15097e7 --- /dev/null +++ b/lib/src/main/java/org/pcre4j/Pcre2JitStack.java @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2024 Oleksii PELYKH + * + * This file is a part of the PCRE4J. The PCRE4J is free software: you can redistribute it and/or modify it under the + * terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License along with this program. If not, see + * . + */ +package org.pcre4j; + +import org.pcre4j.api.IPcre2; + +import java.lang.ref.Cleaner; + +public class Pcre2JitStack { + + private static final Cleaner cleaner = Cleaner.create(); + + /** + * The JIT stack handle + */ + /* package-private */ final long handle; + + /** + * The PCRE2 API reference to use across the entire lifecycle of the object + */ + /* package-private */ final IPcre2 api; + + /** + * The cleaner to free the resources + */ + private final Cleaner.Cleanable cleanable; + + /** + * Create a new JIT stack + * + * @param startSize the initial size of the JIT stack + * @param maxSize the maximum size of the JIT stack + * @param generalContext the general context to use or {@code null} to use the default context + */ + public Pcre2JitStack(long startSize, long maxSize, Pcre2GeneralContext generalContext) { + this(Pcre4j.api(), startSize, maxSize, generalContext); + } + + /** + * Create a new JIT stack + * + * @param api the PCRE2 API to use + * @param startSize the initial size of the JIT stack + * @param maxSize the maximum size of the JIT stack + * @param generalContext the general context to use or {@code null} to use the default context + */ + public Pcre2JitStack(IPcre2 api, long startSize, long maxSize, Pcre2GeneralContext generalContext) { + if (api == null) { + throw new IllegalArgumentException("api cannot be null"); + } + + final var handle = api.jitStackCreate( + startSize, + maxSize, + generalContext != null ? generalContext.handle : 0 + ); + if (handle == 0) { + throw new IllegalStateException("Failed to create JIT stack"); + } + + this.api = api; + this.handle = handle; + this.cleanable = cleaner.register(this, new Pcre2JitStack.Clean(api, handle)); + } + + /** + * Get the PCRE2 API backing this JIT stack + * + * @return the PCRE2 API + */ + public IPcre2 api() { + return api; + } + + /** + * Get the handle of the JIT stack + * + * @return the handle of the JIT stack + */ + public long handle() { + return handle; + } + + private record Clean(IPcre2 api, long matchContext) implements Runnable { + @Override + public void run() { + api.jitStackFree(matchContext); + } + } + +} diff --git a/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java b/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java index e26c765..3141918 100644 --- a/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java +++ b/lib/src/main/java/org/pcre4j/Pcre2MatchContext.java @@ -21,14 +21,17 @@ public class Pcre2MatchContext { private static final Cleaner cleaner = Cleaner.create(); + /** * The match context handle */ /* package-private */ final long handle; + /** * The PCRE2 API reference to use across the entire lifecycle of the object */ /* package-private */ final IPcre2 api; + /** * The cleaner to free the resources */ @@ -84,6 +87,18 @@ public long handle() { return handle; } + /** + * Assign a JIT stack to the match context + * + * @param jitStack the JIT stack to assign + */ + public void assignJitStack(Pcre2JitStack jitStack) { + if (jitStack == null) { + throw new IllegalArgumentException("jitStack must not be null"); + } + api.jitStackAssign(handle, 0, jitStack.handle); + } + private record Clean(IPcre2 api, long matchContext) implements Runnable { @Override public void run() { diff --git a/regex/src/main/java/org/pcre4j/regex/Matcher.java b/regex/src/main/java/org/pcre4j/regex/Matcher.java index ee9bc49..adf32f7 100644 --- a/regex/src/main/java/org/pcre4j/regex/Matcher.java +++ b/regex/src/main/java/org/pcre4j/regex/Matcher.java @@ -14,10 +14,7 @@ */ package org.pcre4j.regex; -import org.pcre4j.Pcre2Code; -import org.pcre4j.Pcre2MatchData; -import org.pcre4j.Pcre2MatchOption; -import org.pcre4j.Pcre4jUtils; +import org.pcre4j.*; import org.pcre4j.api.IPcre2; import java.nio.charset.StandardCharsets; @@ -31,11 +28,24 @@ */ public class Matcher implements java.util.regex.MatchResult { + private final static long JIT_STACK_START_SIZE = 32 * 1024; + private final static long JIT_STACK_MAX_SIZE = 512 * 1024; + /** * The pattern that this matcher used to match the input against */ private Pattern pattern; + /** + * The match context that this matcher uses to match against the pattern + */ + private Pcre2MatchContext matchContext; + + /** + * The JIT stack that this matcher uses to match against the pattern + */ + private Pcre2JitStack jitStack; + /** * A map of group names to group indices */ @@ -73,9 +83,14 @@ public class Matcher implements java.util.regex.MatchResult { /* package-private */ Matcher(Pattern pattern, CharSequence input) { this.pattern = pattern; + this.matchContext = new Pcre2MatchContext(pattern.code.api(), null); + this.jitStack = new Pcre2JitStack(pattern.code.api(), JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, null); + this.matchContext.assignJitStack(jitStack); this.groupNameToIndex = pattern.namedGroups(); + this.input = input.toString(); this.inputBytes = this.input.getBytes(StandardCharsets.UTF_8); + reset(); } @@ -290,7 +305,7 @@ public boolean lookingAt() { regionStart, matchOptions, matchData, - null + matchContext ); if (result < 1) { if (result == IPcre2.ERROR_NOMATCH) { @@ -329,7 +344,7 @@ public boolean matches() { regionStart, matchOptions, matchData, - null + matchContext ); if (result < 1) { if (result == IPcre2.ERROR_NOMATCH) { @@ -535,9 +550,15 @@ public Matcher usePattern(Pattern newPattern) { if (newPattern == null) { throw new IllegalArgumentException("Pattern cannot be null"); } + this.pattern = newPattern; + this.matchContext = new Pcre2MatchContext(pattern.code.api(), null); + this.jitStack = new Pcre2JitStack(pattern.code.api(), JIT_STACK_START_SIZE, JIT_STACK_MAX_SIZE, null); + this.matchContext.assignJitStack(jitStack); this.groupNameToIndex = newPattern.namedGroups(); + reset(); + return this; } @@ -556,7 +577,7 @@ private boolean search(int start) { start, EnumSet.noneOf(Pcre2MatchOption.class), matchData, - null + matchContext ); if (result < 1) { if (result == IPcre2.ERROR_NOMATCH) {