Skip to content

Commit

Permalink
Pool a matcher per thread
Browse files Browse the repository at this point in the history
Matching is a leaf operation, so we can pool a matcher per thread
to avoid the cost of constructing and initializing the entire
matcher for each string.

This patch does eliminate all object creation in typical usage,
reusing all objects and expecting users to pull match data out of
them before using the regex again. However in benchmarks of a
regex-heavy StringScanner-based CSV library (jruby/jruby#7604) the
overhead of fetching and clearing the pooled matcher appears to
hurt performance more than it helps to remove the allocations.
  • Loading branch information
headius committed Oct 10, 2023
1 parent c187bb2 commit 96b5a7f
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 5 deletions.
14 changes: 14 additions & 0 deletions src/org/joni/ByteCodeMachine.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ class ByteCodeMachine extends StackMachine {
this.code = regex.code;
}

public void reset(byte[]bytes, int p, int end) {
super.reset(bytes, p, end);

interrupted = false;
bestLen = 0;
s = 0;
range = 0;
sprev = 0;
sstart = 0;
sbegin = 0;
pkeep = 0;
ip = 0;
}

@Override
public void interrupt() {
interrupted = true;
Expand Down
22 changes: 18 additions & 4 deletions src/org/joni/Matcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@ public abstract class Matcher extends IntHolder {
protected final Regex regex;
protected final Encoding enc;

protected final byte[]bytes;
protected final int str;
protected final int end;
protected byte[]bytes;
protected int str;
protected int end;

protected int msaStart;
protected int msaOptions;
protected final Region msaRegion;
protected Region msaRegion;
protected int msaBestLen;
protected int msaBestS;
protected int msaGpos;
Expand All @@ -58,6 +58,20 @@ public abstract class Matcher extends IntHolder {
this.msaRegion = region;
}

public void reset(byte[]bytes, int p, int end) {
if (this.msaRegion != null) this.msaRegion.clear();
this.bytes = bytes;
this.str = p;
this.end = end;
this.msaStart = 0;
this.msaOptions = 0;
this.msaBestLen = 0;
this.msaBestS = 0;
this.msaGpos = 0;
this.msaBegin = 0;
this.msaEnd = 0;
}

// main matching method
protected abstract int matchAt(int range, int sstart, int sprev, boolean interrupt) throws InterruptedException;

Expand Down
14 changes: 13 additions & 1 deletion src/org/joni/Regex.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
import static org.joni.Option.isDontCaptureGroup;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.jcodings.CaseFoldCodeItem;
import org.jcodings.Encoding;
Expand Down Expand Up @@ -182,9 +184,19 @@ public Matcher matcherNoRegion(byte[]bytes) {
}

public Matcher matcher(byte[]bytes, int p, int end) {
return factory.create(this, numMem == 0 ? null : Region.newRegion(numMem + 1), bytes, p, end);
Matcher matcher = matchers.get();

if (matcher == null) {
matchers.set(matcher = factory.create(this, numMem == 0 ? null : Region.newRegion(numMem + 1), bytes, p, end));
} else {
matcher.reset(bytes, p, end);
}

return matcher;
}

private ThreadLocal<Matcher> matchers = new ThreadLocal<>();

public Matcher matcherNoRegion(byte[]bytes, int p, int end) {
return factory.create(this, null, bytes, p, end);
}
Expand Down
10 changes: 10 additions & 0 deletions src/org/joni/StackEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class StackEntry {
int type;
private int E1, E2, E3, E4;

void reset() {
type = E1 = E2 = E3 = E4 = 0;
}

// first union member
/* byte code position */
void setStatePCode(int pcode) {
Expand Down Expand Up @@ -188,4 +192,10 @@ void setStateCheck(int check) {
int getStateCheck() {
return E5;
}

void reset() {
super.reset();

E5 = 0;
}
}
19 changes: 19 additions & 0 deletions src/org/joni/StackMachine.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ protected StackMachine(Regex regex, Region region, byte[]bytes, int p , int end)
repeatStk = n > 0 ? new int[n] : null;
}

public void reset(byte[]bytes, int p, int end) {
super.reset(bytes, p, end);

if (regex.requireStack) resetStack(stack);
if (repeatStk != null) {
Arrays.fill(repeatStk, repeatStk.length);
}
stk = 0;
stateCheckBuff = null;
stateCheckBuffSize = 0;
}

protected final void stackInit() {
if (stack != null) pushEnsured(ALT, regex.codeLength - 1); /* bottom stack */
if (repeatStk != null) {
Expand All @@ -71,6 +83,13 @@ private static StackEntry[] allocateStack() {
return stack;
}

private static void resetStack(StackEntry[] stack) {
for (int i = 0; i < stack.length; i++) {
StackEntry entry = stack[i];
if (entry != null) entry.reset();
}
}

private void doubleStack() {
StackEntry[] newStack = new StackEntry[stack.length << 1];
System.arraycopy(stack, 0, newStack, 0, stack.length);
Expand Down

0 comments on commit 96b5a7f

Please sign in to comment.