Skip to content

Commit

Permalink
Avoid creating intermediate symbols for attr assigns
Browse files Browse the repository at this point in the history
Attribute assignment with a user-provided symbol creates a new
ByteList with '=' appended, and then proceeds to look up any
existing symbol with that new name. If the name exists, the
intermediate ByteList is a wasted allocation.

This patch provides another way to look up a symbol with a single-
character suffix without creating a new ByteList to hold those
bytes. In a quick and dirty profile, use of the previous commit's
ByteList.copyAndAppend almost entirely disappears, and overall
byte[] and ByteList allocations are largely limited to creating
the original identifier name.

This technique could be extended to support looking up a symbol
based on a range of bytes, avoiding any copying when the bytes are
already represented by an existing symbol.
  • Loading branch information
headius committed Aug 22, 2024
1 parent b2e2172 commit ca77a19
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 4 deletions.
57 changes: 57 additions & 0 deletions core/src/main/java/org/jruby/RubySymbol.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,10 @@ public static RubySymbol newIDSymbol(Ruby runtime, ByteList bytes) {
return newHardSymbol(runtime, bytes);
}

public static RubySymbol newSuffixedIDSymbol(Ruby runtime, ByteList bytes, int suffix) {
return runtime.getSymbolTable().getSuffixedSymbol(bytes, suffix, true);
}

/**
* Generic identifier symbol creation (or retrieval) method that invokes a handler before storing new symbols.
*
Expand Down Expand Up @@ -1071,6 +1075,23 @@ public RubySymbol getSymbol(ByteList bytes, boolean hard) {
return symbol;
}

public RubySymbol getSuffixedSymbol(ByteList bytes, int extra, boolean hard) {
int hash = javaStringHashCode(bytes, extra);

RubySymbol symbol = findSuffixedSymbol(bytes, extra, hash, hard);

if (symbol == null) {
bytes = bytes.copyAndAppend(extra);
symbol = createSymbol(
RubyEncoding.decodeRaw(bytes),
bytes,
hash,
hard);
}

return symbol;
}

/**
* Get or retrieve an existing symbol from the table, invoking the given handler before return.
* In the case of a new symbol, the handler will be invoked before the symbol is registered, so it can be
Expand Down Expand Up @@ -1115,6 +1136,20 @@ private RubySymbol findSymbol(ByteList bytes, int hash, boolean hard) {
return symbol;
}

private RubySymbol findSuffixedSymbol(ByteList bytes, int suffix, int hash, boolean hard) {
RubySymbol symbol = null;

for (SymbolEntry e = getEntryFromTable(symbolTable, hash); e != null; e = e.next) {
if (isSuffixedSymbolMatch(bytes, suffix, hash, e)) {
if (hard) e.setHardReference();
symbol = e.symbol.get();
break;
}
}

return symbol;
}

public RubySymbol fastGetSymbol(String internedName) {
return fastGetSymbol(internedName, false);
}
Expand Down Expand Up @@ -1150,6 +1185,12 @@ private static boolean isSymbolMatch(ByteList bytes, int hash, SymbolEntry entry
return hash == entry.hash && bytes.equals(entry.bytes);
}

private static boolean isSuffixedSymbolMatch(ByteList bytes, int suffix, int hash, SymbolEntry entry) {
ByteList entryBytes = entry.bytes;
int entrySize = entryBytes.realSize();
return hash == entry.hash && entrySize == bytes.realSize() + 1 && entryBytes.startsWith(bytes) && entryBytes.get(entrySize - 1) == suffix;
}

private RubySymbol createSymbol(final String name, final ByteList value, final int hash, boolean hard) {
ReentrantLock lock = tableLock;

Expand Down Expand Up @@ -1430,6 +1471,22 @@ public static int javaStringHashCode(ByteList iso8859) {
return h;
}

/**
* Like javaStringHashCode but considering one additional byte.
*
* Useful for hashing a ByteList plus one more character without creating a new ByteList.
*
* @param iso8859 the base ByteList
* @param extra the extra byte
* @return a hash of the bytes of the base ByteList plus the extra byte
*/
public static int javaStringHashCode(ByteList iso8859, int extra) {
int h = javaStringHashCode(iso8859);
int v = extra & 0xFF;
h = 31 * h + v;
return h;
}

@Override
public Class getJavaClass() {
return String.class;
Expand Down
29 changes: 25 additions & 4 deletions core/src/main/java/org/jruby/parser/RubyParserBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,7 @@ public Node match_op(Node firstNode, Node secondNode) {
public Node aryset(Node receiver, Node index) {
value_expr(receiver);

return new_attrassign(receiver.getLine(), receiver, CommonByteLists.ASET_METHOD, index, false);
return new_arrayattrassign(receiver.getLine(), receiver, index, false);
}

/**
Expand All @@ -585,7 +585,7 @@ public Node attrset(Node receiver, ByteList name) {
}

public Node attrset(Node receiver, ByteList callType, ByteList name) {
return new_attrassign(receiver.getLine(), receiver, name.copyAndAppend('='), null, isLazy(callType));
return new_attrassign(receiver.getLine(), receiver, name, null, isLazy(callType));
}

public void backrefAssignError(Node node) {
Expand Down Expand Up @@ -1257,6 +1257,16 @@ public RubySymbol symbolID(ByteList identifierValue) {
return RubySymbol.newIDSymbol(getRuntime(), identifierValue);
}

public RubySymbol attrsetSymbolID(ByteList baseIdentifier) {
// FIXME: We walk this during identifier construction so we should calculate CR without having to walk twice.
if (RubyString.scanForCodeRange(baseIdentifier) == StringSupport.CR_BROKEN) {
Ruby runtime = getRuntime();
throw runtime.newSyntaxError(str(runtime, "invalid symbol in encoding " + lexer.getEncoding() + " :\"", inspectIdentifierByteList(runtime, baseIdentifier.copyAndAppend('=')), "\""));
}

return RubySymbol.newSuffixedIDSymbol(getRuntime(), baseIdentifier, '=');
}

public boolean isLazy(String callType) {
return "&.".equals(callType);
}
Expand All @@ -1265,15 +1275,26 @@ public boolean isLazy(ByteList callType) {
return callType == AMPERSAND_DOT;
}

public Node new_attrassign(int line, Node receiver, ByteList name, Node argsNode, boolean isLazy) {
public Node new_attrassign(int line, Node receiver, ByteList baseName, Node argsNode, boolean isLazy) {
// We extract BlockPass from tree and insert it as a block node value (MRI wraps it around the args)
Node blockNode = null;
if (argsNode instanceof BlockPassNode) {
blockNode = argsNode; // It is weird to leave this as-is but we need to know it vs iternode vs weird ast bug.
argsNode = ((BlockPassNode) argsNode).getArgsNode();
}

return new AttrAssignNode(line, receiver, attrsetSymbolID(baseName), argsNode, blockNode, isLazy);
}

public Node new_arrayattrassign(int line, Node receiver, Node argsNode, boolean isLazy) {
// We extract BlockPass from tree and insert it as a block node value (MRI wraps it around the args)
Node blockNode = null;
if (argsNode instanceof BlockPassNode) {
blockNode = argsNode; // It is weird to leave this as-is but we need to know it vs iternode vs weird ast bug.
argsNode = ((BlockPassNode) argsNode).getArgsNode();
}

return new AttrAssignNode(line, receiver, symbolID(name), argsNode, blockNode, isLazy);
return new AttrAssignNode(line, receiver, symbolID(CommonByteLists.ASET_METHOD), argsNode, blockNode, isLazy);
}

public Node new_call(Node receiver, ByteList callType, ByteList name, Node argsNode, Node iter) {
Expand Down

0 comments on commit ca77a19

Please sign in to comment.