From 9d94c431cde2e849022f4e4a60f4ca294b932af1 Mon Sep 17 00:00:00 2001 From: sebthom Date: Sun, 8 Sep 2024 23:27:31 +0200 Subject: [PATCH] perf: only call rewritePatternIfRequired() if parsing failed using joni --- .../core/internal/oniguruma/OnigRegExp.java | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/oniguruma/OnigRegExp.java b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/oniguruma/OnigRegExp.java index b04193c92..458389631 100644 --- a/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/oniguruma/OnigRegExp.java +++ b/org.eclipse.tm4e.core/src/main/java/org/eclipse/tm4e/core/internal/oniguruma/OnigRegExp.java @@ -46,13 +46,9 @@ public final class OnigRegExp { */ private static final WarnCallback LOGGER_WARN_CALLBACK = message -> LOGGER.log(Level.WARNING, message); - @Nullable - private OnigString lastSearchString; - + private @Nullable OnigString lastSearchString; private int lastSearchPosition = -1; - - @Nullable - private OnigResult lastSearchResult; + private @Nullable OnigResult lastSearchResult; private final String pattern; private final Regex regex; @@ -70,23 +66,35 @@ public OnigRegExp(final String pattern) { * @throws TMException if parsing fails */ public OnigRegExp(final String pattern, final boolean ignoreCase) { - this.pattern = rewritePatternIfRequired(pattern); - hasGAnchor = this.pattern.contains("\\G"); - final byte[] patternBytes = this.pattern.getBytes(StandardCharsets.UTF_8); + hasGAnchor = pattern.contains("\\G"); + + Regex regex; try { - int options = Option.CAPTURE_GROUP; - if (ignoreCase) - options |= Option.IGNORECASE; - regex = new Regex(patternBytes, 0, patternBytes.length, options, UTF8Encoding.INSTANCE, Syntax.DEFAULT, - LOGGER.isLoggable(Level.WARNING) ? LOGGER_WARN_CALLBACK : WarnCallback.NONE); + regex = parsePattern(pattern, ignoreCase); } catch (final SyntaxException ex) { - throw new TMException("Parsing regex pattern \"" + this.pattern + "\" failed with " + ex, ex); + try { + regex = parsePattern(rewritePatternIfRequired(pattern), ignoreCase); + } catch (final SyntaxException unused) { + throw new TMException("Parsing regex pattern \"" + pattern + "\" failed with " + ex, ex); + } } + + this.pattern = pattern; + this.regex = regex; + } + + private Regex parsePattern(final String pattern, final boolean ignoreCase) throws SyntaxException { + int options = Option.CAPTURE_GROUP; + if (ignoreCase) + options |= Option.IGNORECASE; + final byte[] patternBytes = pattern.getBytes(StandardCharsets.UTF_8); + return new Regex(patternBytes, 0, patternBytes.length, options, UTF8Encoding.INSTANCE, Syntax.DEFAULT, + LOGGER.isLoggable(Level.WARNING) ? LOGGER_WARN_CALLBACK : WarnCallback.NONE); } /** * Rewrites the given pattern to workaround limitations of the joni library which for example does not support - * negative variable-length lookbehinds + * negative variable-length look-behinds * * @see github.com/eclipse/tm4e/issue/677 */ @@ -129,8 +137,7 @@ private String rewritePatternIfRequired(final String pattern) { return lastSearchResult; } - @Nullable - private OnigResult search(final byte[] data, final int startPosition, final int end) { + private @Nullable OnigResult search(final byte[] data, final int startPosition, final int end) { final Matcher matcher = regex.matcher(data); final int status = matcher.search(startPosition, end, Option.DEFAULT); if (status != Matcher.FAILED) {