Skip to content

Commit

Permalink
Fix header < 16 bytes in indexOf intrinsic, by @sviswa7
Browse files Browse the repository at this point in the history
  • Loading branch information
rkennke committed Oct 8, 2024
1 parent 17f8eb5 commit f65ef5d
Showing 1 changed file with 90 additions and 53 deletions.
143 changes: 90 additions & 53 deletions src/hotspot/cpu/x86/c2_stubGenerator_x86_64_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "macroAssembler_x86.hpp"
#include "stubGenerator_x86_64.hpp"
#include "oops/arrayOop.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"

Expand Down Expand Up @@ -160,6 +161,9 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
Register needle_len, XMMRegister XMM0, XMMRegister XMM1,
Register mask, Register tmp, MacroAssembler *_masm);

static void copy_to_stack(Register haystack, Register haystack_len, bool isU, Register tmp,
XMMRegister xtmp, MacroAssembler *_masm);

static void setup_jump_tables(StrIntrinsicNode::ArgEncoding ae, Label &L_error, Label &L_checkRange,
Label &L_fixup, address *big_jump_table, address *small_jump_table,
MacroAssembler *_masm);
Expand Down Expand Up @@ -395,41 +399,20 @@ static void generate_string_indexof_stubs(StubGenerator *stubgen, address *fnptr

// Do "big switch" if haystack size > 32
__ cmpq(haystack_len, 0x20);
__ ja_b(L_bigSwitchTop);
__ ja(L_bigSwitchTop);

// Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault
// Only done for small haystacks
//
// NOTE: This code assumes that the haystack points to a java array type AND there are
// at least 16 bytes of header preceeding the haystack pointer.
// at least 8 bytes of header preceeding the haystack pointer.
//
// This means that we're copying up to 15 bytes of the header onto the stack along
// This means that we're copying up to 7 bytes of the header onto the stack along
// with the haystack bytes. After the copy completes, we adjust the haystack pointer
// to the valid haystack bytes on the stack.
{
Label L_moreThan16, L_adjustHaystack;

const Register index = rax;
const Register haystack = rbx;

// Only a single vector load/store of either 16 or 32 bytes
__ cmpq(haystack_len, 0x10);
__ ja_b(L_moreThan16);

__ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x10);
__ movdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x10));
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1);
__ jmpb(L_adjustHaystack);

__ bind(L_moreThan16);
__ movq(index, COPIED_HAYSTACK_STACK_OFFSET + 0x20);
__ vmovdqu(XMM_TMP1, Address(haystack, haystack_len, Address::times_1, -0x20));
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM_TMP1);

// Point the haystack at the correct location of the first byte of the "real" haystack on the stack
__ bind(L_adjustHaystack);
__ subq(index, haystack_len);
__ leaq(haystack, Address(rsp, index, Address::times_1));
copy_to_stack(haystack, haystack_len, false, rax, XMM_TMP1, _masm);
}

// Dispatch to handlers for small needle and small haystack
Expand Down Expand Up @@ -1583,34 +1566,8 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
assert((COPIED_HAYSTACK_STACK_SIZE == 64), "Must be 64!");

// Copy incoming haystack onto stack
{
Label L_adjustHaystack, L_moreThan16;

// Copy haystack to stack (haystack <= 32 bytes)
__ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE);
__ cmpq(haystack_len, isU ? 0x8 : 0x10);
__ ja_b(L_moreThan16);

__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x10);
__ movdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x10));
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0);
__ jmpb(L_adjustHaystack);

__ bind(L_moreThan16);
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 0x20);
__ vmovdqu(XMM0, Address(haystack, haystack_len, isU ? Address::times_2 : Address::times_1, -0x20));
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), XMM0);

__ bind(L_adjustHaystack);
__ subptr(tmp, haystack_len);

if (isU) {
// For UTF-16, lengths are half
__ subptr(tmp, haystack_len);
}
// Point the haystack to the stack
__ leaq(haystack, Address(rsp, tmp, Address::times_1));
}
__ subptr(rsp, COPIED_HAYSTACK_STACK_SIZE);
copy_to_stack(haystack, haystack_len, isU, tmp, XMM0, _masm);

// Creates a mask of (n - k + 1) ones. This prevents recognizing any false-positives
// past the end of the valid haystack.
Expand Down Expand Up @@ -1672,6 +1629,86 @@ static void highly_optimized_short_cases(StrIntrinsicNode::ArgEncoding ae, Regis
__ jmpb(L_out);
}



// Copy the small (< 32 byte) haystack to the stack. Allows for vector reads without page fault
// Only done for small haystacks
// NOTE: This code assumes that the haystack points to a java array type AND there are
// at least 8 bytes of header preceeding the haystack pointer.
// We're copying up to 7 bytes of the header onto the stack along with the haystack bytes.
// After the copy completes, we adjust the haystack pointer
// to the valid haystack bytes on the stack.
//
// Copy haystack array elements to stack at region
// (COPIED_HAYSTACK_STACK_OFFSET - COPIED_HAYSTACK_STACK_OFFSET+63) with the following conditions:
// It may copy up to 7 bytes that precede the array
// It doesn't read beyond the end of the array
// There are atleast 31 bytes of stack region beyond the end of array
// Inputs:
// haystack - Address of haystack
// haystack_len - Number of elements in haystack
// isU - Boolean indicating if each element is Latin1 or UTF16
// tmp, xtmp - Scratch registers
// Output:
// haystack - Address of copied string on stack

static void copy_to_stack(Register haystack, Register haystack_len, bool isU,
Register tmp, XMMRegister xtmp, MacroAssembler *_masm) {
Label L_moreThan8, L_moreThan16, L_moreThan24, L_adjustHaystack;

assert(arrayOopDesc::base_offset_in_bytes(isU ? T_CHAR : T_BYTE) >= 8,
"Needs at least 8 bytes preceding the array body");

// Copy haystack to stack (haystack <= 32 bytes)
int scale = isU ? 2 : 1; // bytes per char
Address::ScaleFactor addrScale = isU ? Address::times_2 : Address::times_1;

__ cmpq(haystack_len, 16/scale);
__ ja_b(L_moreThan16);

__ cmpq(haystack_len, 8/scale);
__ ja_b(L_moreThan8);
// haystack length <= 8 bytes, copy 8 bytes upto haystack end reading at most 7 bytes into the header
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 8);
__ movq(xtmp, Address(haystack, haystack_len, addrScale, -8));
__ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
__ jmpb(L_adjustHaystack);

__ bind(L_moreThan8);
// haystack length > 8 and <=16 bytes, copy 16 bytes upto haystack end reading at most 7 bytes into the header
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 16);
__ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -16));
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
__ jmpb(L_adjustHaystack);

__ bind(L_moreThan16);
__ cmpq(haystack_len, 24/scale);
__ ja_b(L_moreThan24);
// haystack length > 16 and <=24 bytes, copy 24 bytes upto haystack end reading at most 7 bytes into the header
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 24);
__ movdqu(xtmp, Address(haystack, haystack_len, addrScale, -24));
__ movdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);
__ movq(xtmp, Address(haystack, haystack_len, addrScale, -8));
__ movq(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET + 16), xtmp);
__ jmpb(L_adjustHaystack);

__ bind(L_moreThan24);
// haystack length > 24 and < 32 bytes, copy 32 bytes upto haystack end reading at most 7 bytes into the header
__ movq(tmp, COPIED_HAYSTACK_STACK_OFFSET + 32);
__ vmovdqu(xtmp, Address(haystack, haystack_len, addrScale, -32));
__ vmovdqu(Address(rsp, COPIED_HAYSTACK_STACK_OFFSET), xtmp);

__ bind(L_adjustHaystack);
__ subptr(tmp, haystack_len);

if (isU) {
__ subptr(tmp, haystack_len);
}

// Point the haystack to the stack
__ leaq(haystack, Address(rsp, tmp, Address::times_1));
}

////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit f65ef5d

Please sign in to comment.