From ca77a1993dda600a5c75d789ba90ba35caa375c4 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 21 Aug 2024 22:24:20 -0500 Subject: [PATCH] Avoid creating intermediate symbols for attr assigns Attribute assignment with a user-provided symbol creates a new ByteList with '=' appended, and then proceeds to look up any existing symbol with that new name. If the name exists, the intermediate ByteList is a wasted allocation. This patch provides another way to look up a symbol with a single- character suffix without creating a new ByteList to hold those bytes. In a quick and dirty profile, use of the previous commit's ByteList.copyAndAppend almost entirely disappears, and overall byte[] and ByteList allocations are largely limited to creating the original identifier name. This technique could be extended to support looking up a symbol based on a range of bytes, avoiding any copying when the bytes are already represented by an existing symbol. --- core/src/main/java/org/jruby/RubySymbol.java | 57 +++++++++++++++++++ .../java/org/jruby/parser/RubyParserBase.java | 29 ++++++++-- 2 files changed, 82 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/jruby/RubySymbol.java b/core/src/main/java/org/jruby/RubySymbol.java index 6ec5001d656..bd32fd4513c 100644 --- a/core/src/main/java/org/jruby/RubySymbol.java +++ b/core/src/main/java/org/jruby/RubySymbol.java @@ -373,6 +373,10 @@ public static RubySymbol newIDSymbol(Ruby runtime, ByteList bytes) { return newHardSymbol(runtime, bytes); } + public static RubySymbol newSuffixedIDSymbol(Ruby runtime, ByteList bytes, int suffix) { + return runtime.getSymbolTable().getSuffixedSymbol(bytes, suffix, true); + } + /** * Generic identifier symbol creation (or retrieval) method that invokes a handler before storing new symbols. * @@ -1071,6 +1075,23 @@ public RubySymbol getSymbol(ByteList bytes, boolean hard) { return symbol; } + public RubySymbol getSuffixedSymbol(ByteList bytes, int extra, boolean hard) { + int hash = javaStringHashCode(bytes, extra); + + RubySymbol symbol = findSuffixedSymbol(bytes, extra, hash, hard); + + if (symbol == null) { + bytes = bytes.copyAndAppend(extra); + symbol = createSymbol( + RubyEncoding.decodeRaw(bytes), + bytes, + hash, + hard); + } + + return symbol; + } + /** * Get or retrieve an existing symbol from the table, invoking the given handler before return. * In the case of a new symbol, the handler will be invoked before the symbol is registered, so it can be @@ -1115,6 +1136,20 @@ private RubySymbol findSymbol(ByteList bytes, int hash, boolean hard) { return symbol; } + private RubySymbol findSuffixedSymbol(ByteList bytes, int suffix, int hash, boolean hard) { + RubySymbol symbol = null; + + for (SymbolEntry e = getEntryFromTable(symbolTable, hash); e != null; e = e.next) { + if (isSuffixedSymbolMatch(bytes, suffix, hash, e)) { + if (hard) e.setHardReference(); + symbol = e.symbol.get(); + break; + } + } + + return symbol; + } + public RubySymbol fastGetSymbol(String internedName) { return fastGetSymbol(internedName, false); } @@ -1150,6 +1185,12 @@ private static boolean isSymbolMatch(ByteList bytes, int hash, SymbolEntry entry return hash == entry.hash && bytes.equals(entry.bytes); } + private static boolean isSuffixedSymbolMatch(ByteList bytes, int suffix, int hash, SymbolEntry entry) { + ByteList entryBytes = entry.bytes; + int entrySize = entryBytes.realSize(); + return hash == entry.hash && entrySize == bytes.realSize() + 1 && entryBytes.startsWith(bytes) && entryBytes.get(entrySize - 1) == suffix; + } + private RubySymbol createSymbol(final String name, final ByteList value, final int hash, boolean hard) { ReentrantLock lock = tableLock; @@ -1430,6 +1471,22 @@ public static int javaStringHashCode(ByteList iso8859) { return h; } + /** + * Like javaStringHashCode but considering one additional byte. + * + * Useful for hashing a ByteList plus one more character without creating a new ByteList. + * + * @param iso8859 the base ByteList + * @param extra the extra byte + * @return a hash of the bytes of the base ByteList plus the extra byte + */ + public static int javaStringHashCode(ByteList iso8859, int extra) { + int h = javaStringHashCode(iso8859); + int v = extra & 0xFF; + h = 31 * h + v; + return h; + } + @Override public Class getJavaClass() { return String.class; diff --git a/core/src/main/java/org/jruby/parser/RubyParserBase.java b/core/src/main/java/org/jruby/parser/RubyParserBase.java index eef9a529f87..ab033865e04 100644 --- a/core/src/main/java/org/jruby/parser/RubyParserBase.java +++ b/core/src/main/java/org/jruby/parser/RubyParserBase.java @@ -570,7 +570,7 @@ public Node match_op(Node firstNode, Node secondNode) { public Node aryset(Node receiver, Node index) { value_expr(receiver); - return new_attrassign(receiver.getLine(), receiver, CommonByteLists.ASET_METHOD, index, false); + return new_arrayattrassign(receiver.getLine(), receiver, index, false); } /** @@ -585,7 +585,7 @@ public Node attrset(Node receiver, ByteList name) { } public Node attrset(Node receiver, ByteList callType, ByteList name) { - return new_attrassign(receiver.getLine(), receiver, name.copyAndAppend('='), null, isLazy(callType)); + return new_attrassign(receiver.getLine(), receiver, name, null, isLazy(callType)); } public void backrefAssignError(Node node) { @@ -1257,6 +1257,16 @@ public RubySymbol symbolID(ByteList identifierValue) { return RubySymbol.newIDSymbol(getRuntime(), identifierValue); } + public RubySymbol attrsetSymbolID(ByteList baseIdentifier) { + // FIXME: We walk this during identifier construction so we should calculate CR without having to walk twice. + if (RubyString.scanForCodeRange(baseIdentifier) == StringSupport.CR_BROKEN) { + Ruby runtime = getRuntime(); + throw runtime.newSyntaxError(str(runtime, "invalid symbol in encoding " + lexer.getEncoding() + " :\"", inspectIdentifierByteList(runtime, baseIdentifier.copyAndAppend('=')), "\"")); + } + + return RubySymbol.newSuffixedIDSymbol(getRuntime(), baseIdentifier, '='); + } + public boolean isLazy(String callType) { return "&.".equals(callType); } @@ -1265,7 +1275,18 @@ public boolean isLazy(ByteList callType) { return callType == AMPERSAND_DOT; } - public Node new_attrassign(int line, Node receiver, ByteList name, Node argsNode, boolean isLazy) { + public Node new_attrassign(int line, Node receiver, ByteList baseName, Node argsNode, boolean isLazy) { + // We extract BlockPass from tree and insert it as a block node value (MRI wraps it around the args) + Node blockNode = null; + if (argsNode instanceof BlockPassNode) { + blockNode = argsNode; // It is weird to leave this as-is but we need to know it vs iternode vs weird ast bug. + argsNode = ((BlockPassNode) argsNode).getArgsNode(); + } + + return new AttrAssignNode(line, receiver, attrsetSymbolID(baseName), argsNode, blockNode, isLazy); + } + + public Node new_arrayattrassign(int line, Node receiver, Node argsNode, boolean isLazy) { // We extract BlockPass from tree and insert it as a block node value (MRI wraps it around the args) Node blockNode = null; if (argsNode instanceof BlockPassNode) { @@ -1273,7 +1294,7 @@ public Node new_attrassign(int line, Node receiver, ByteList name, Node argsNode argsNode = ((BlockPassNode) argsNode).getArgsNode(); } - return new AttrAssignNode(line, receiver, symbolID(name), argsNode, blockNode, isLazy); + return new AttrAssignNode(line, receiver, symbolID(CommonByteLists.ASET_METHOD), argsNode, blockNode, isLazy); } public Node new_call(Node receiver, ByteList callType, ByteList name, Node argsNode, Node iter) {