Skip to content

Commit

Permalink
refactor: internal low level allocation stuff, removing a branch when…
Browse files Browse the repository at this point in the history
… allocating (fixes #25)
  • Loading branch information
bluurryy committed Aug 22, 2024
1 parent 5469419 commit e9b911b
Show file tree
Hide file tree
Showing 45 changed files with 380 additions and 404 deletions.
5 changes: 2 additions & 3 deletions crates/inspect-asm/out/x86-64/alloc_12_u32/down.asm
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ inspect_asm::alloc_12_u32::down:
mov rax, qword ptr [rcx]
mov rdx, rax
sub rdx, qword ptr [rcx + 8]
cmp rdx, 48
jb .LBB0_1
cmp rdx, 47
jbe .LBB0_1
and rax, -4
add rax, -48
mov qword ptr [rcx], rax
je .LBB0_1
.LBB0_0:
movups xmm0, xmmword ptr [rsi]
movups xmm1, xmmword ptr [rsi + 16]
Expand Down
5 changes: 2 additions & 3 deletions crates/inspect-asm/out/x86-64/alloc_12_u32/down_a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@ inspect_asm::alloc_12_u32::down_a:
mov rax, qword ptr [rcx]
mov rdx, rax
sub rdx, qword ptr [rcx + 8]
cmp rdx, 48
jb .LBB0_1
cmp rdx, 47
jbe .LBB0_1
add rax, -48
mov qword ptr [rcx], rax
je .LBB0_1
.LBB0_0:
movups xmm0, xmmword ptr [rsi]
movups xmm1, xmmword ptr [rsi + 16]
Expand Down
6 changes: 2 additions & 4 deletions crates/inspect-asm/out/x86-64/alloc_12_u32/up.asm
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ inspect_asm::alloc_12_u32::up:
add rax, 3
and rax, -4
sub rdx, rax
cmp rdx, 48
jb .LBB0_1
cmp rdx, 47
jbe .LBB0_1
lea rdx, [rax + 48]
mov qword ptr [rcx], rdx
test rax, rax
je .LBB0_1
.LBB0_0:
movups xmm0, xmmword ptr [rsi]
movups xmm1, xmmword ptr [rsi + 16]
Expand Down
4 changes: 2 additions & 2 deletions crates/inspect-asm/out/x86-64/alloc_12_u32/up_a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ inspect_asm::alloc_12_u32::up_a:
mov rax, qword ptr [rcx]
mov rdx, qword ptr [rcx + 8]
sub rdx, rax
cmp rdx, 48
jb .LBB0_1
cmp rdx, 47
jbe .LBB0_1
lea rdx, [rax + 48]
mov qword ptr [rcx], rdx
.LBB0_0:
Expand Down
2 changes: 0 additions & 2 deletions crates/inspect-asm/out/x86-64/alloc_big/down.asm
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ inspect_asm::alloc_big::down:
cmp rdi, qword ptr [rcx + 8]
jb .LBB0_0
mov qword ptr [rcx], rdi
test rdi, rdi
je .LBB0_0
mov edx, 512
pop rbx
jmp qword ptr [rip + memcpy@GOTPCREL]
Expand Down
2 changes: 0 additions & 2 deletions crates/inspect-asm/out/x86-64/alloc_big/down_a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ inspect_asm::alloc_big::down_a:
cmp rdi, qword ptr [rcx + 8]
jb .LBB0_0
mov qword ptr [rcx], rdi
test rdi, rdi
je .LBB0_0
mov edx, 512
pop rbx
jmp qword ptr [rip + memcpy@GOTPCREL]
Expand Down
3 changes: 1 addition & 2 deletions crates/inspect-asm/out/x86-64/alloc_big/up.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ inspect_asm::alloc_big::up:
cmovae rdx, r8
cmp rdx, qword ptr [rcx + 8]
ja .LBB0_0
mov qword ptr [rcx], rdx
add rdi, 512
je .LBB0_0
mov qword ptr [rcx], rdx
mov edx, 512
pop rbx
jmp qword ptr [rip + memcpy@GOTPCREL]
Expand Down
3 changes: 1 addition & 2 deletions crates/inspect-asm/out/x86-64/alloc_big/up_a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ inspect_asm::alloc_big::up_a:
cmovae rdx, r8
cmp rdx, qword ptr [rcx + 8]
ja .LBB0_0
mov qword ptr [rcx], rdx
add rdi, 512
je .LBB0_0
mov qword ptr [rcx], rdx
mov edx, 512
pop rbx
jmp qword ptr [rip + memcpy@GOTPCREL]
Expand Down
1 change: 0 additions & 1 deletion crates/inspect-asm/out/x86-64/alloc_iter_u32/down.asm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ inspect_asm::alloc_iter_u32::down:
sub rax, r12
and rax, -4
mov qword ptr [rcx], rax
je .LBB0_10
.LBB0_0:
mov qword ptr [rsp + 8], rax
mov qword ptr [rsp + 16], 0
Expand Down
45 changes: 22 additions & 23 deletions crates/inspect-asm/out/x86-64/alloc_iter_u32/down_a.asm
Original file line number Diff line number Diff line change
Expand Up @@ -6,52 +6,51 @@ inspect_asm::alloc_iter_u32::down_a:
push r12
push rbx
sub rsp, 40
mov rbx, rdi
mov r14, rdi
test rdx, rdx
je .LBB0_5
mov rax, rdx
shr rax, 61
jne .LBB0_11
mov r15, rsi
lea r12, [4*rdx]
mov rcx, qword ptr [rbx]
mov rcx, qword ptr [r14]
mov rax, qword ptr [rcx]
mov rsi, rax
sub rsi, qword ptr [rcx + 8]
cmp r12, rsi
ja .LBB0_10
sub rax, r12
mov qword ptr [rcx], rax
je .LBB0_10
.LBB0_0:
mov qword ptr [rsp + 8], rax
mov qword ptr [rsp + 16], 0
mov qword ptr [rsp + 24], rdx
mov qword ptr [rsp + 32], rbx
mov qword ptr [rsp + 32], r14
xor r13d, r13d
lea rbx, [rsp + 8]
xor r14d, r14d
lea r14, [rsp + 8]
xor ebx, ebx
jmp .LBB0_2
.LBB0_1:
mov dword ptr [rax + 4*r14], ebp
inc r14
mov qword ptr [rsp + 16], r14
mov dword ptr [rax + 4*rbx], ebp
inc rbx
mov qword ptr [rsp + 16], rbx
add r13, 4
cmp r12, r13
je .LBB0_3
.LBB0_2:
mov ebp, dword ptr [r15 + r13]
cmp qword ptr [rsp + 24], r14
cmp qword ptr [rsp + 24], rbx
jne .LBB0_1
mov rdi, rbx
mov rdi, r14
call bump_scope::bump_vec::BumpVec<T,A,_,_,_>::generic_grow_cold
mov rax, qword ptr [rsp + 8]
mov r14, qword ptr [rsp + 16]
mov rbx, qword ptr [rsp + 16]
jmp .LBB0_1
.LBB0_3:
mov rsi, qword ptr [rsp + 8]
mov rbx, qword ptr [rsp + 32]
mov rax, qword ptr [rbx]
mov r14, qword ptr [rsp + 32]
mov rax, qword ptr [r14]
cmp rsi, qword ptr [rax]
je .LBB0_6
.LBB0_4:
Expand All @@ -60,12 +59,12 @@ inspect_asm::alloc_iter_u32::down_a:
.LBB0_5:
mov qword ptr [rsp + 24], rdx
mov esi, 4
xor r14d, r14d
mov rax, qword ptr [rbx]
xor ebx, ebx
mov rax, qword ptr [r14]
cmp rsi, qword ptr [rax]
jne .LBB0_4
.LBB0_6:
lea rdx, [4*r14]
lea rdx, [4*rbx]
mov rax, qword ptr [rsp + 24]
lea rax, [rsi + 4*rax]
xor edi, edi
Expand All @@ -81,12 +80,12 @@ inspect_asm::alloc_iter_u32::down_a:
.LBB0_7:
call qword ptr [rip + memcpy@GOTPCREL]
.LBB0_8:
mov rcx, qword ptr [rbx]
mov rcx, qword ptr [r14]
mov rax, r15
mov qword ptr [rcx], r15
mov qword ptr [rsp + 24], r14
mov qword ptr [rsp + 24], rbx
.LBB0_9:
mov rdx, r14
mov rdx, rbx
add rsp, 40
pop rbx
pop r12
Expand All @@ -96,11 +95,11 @@ inspect_asm::alloc_iter_u32::down_a:
pop rbp
ret
.LBB0_10:
mov rdi, rbx
mov rdi, r14
mov rsi, rdx
mov r14, rdx
mov rbx, rdx
call bump_scope::bump_scope::BumpScope<A,_,_,_>::do_alloc_slice_in_another_chunk
mov rdx, r14
mov rdx, rbx
jmp .LBB0_0
.LBB0_11:
call qword ptr [rip + bump_scope::private::capacity_overflow@GOTPCREL]
64 changes: 31 additions & 33 deletions crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down.asm
Original file line number Diff line number Diff line change
@@ -1,55 +1,53 @@
inspect_asm::alloc_iter_u32::exact_down:
push r15
push r14
push rbx
push rax
mov rax, rdx
shr rax, 61
jne .LBB0_7
lea r8, [4*rdx]
lea rbx, [4*rdx]
mov rcx, qword ptr [rdi]
mov rax, qword ptr [rcx]
mov r9, rax
sub r9, qword ptr [rcx + 8]
cmp r8, r9
mov r8, rax
sub r8, qword ptr [rcx + 8]
cmp rbx, r8
ja .LBB0_5
sub rax, r8
sub rax, rbx
and rax, -4
mov qword ptr [rcx], rax
je .LBB0_5
test rdx, rdx
je .LBB0_4
.LBB0_0:
lea rcx, [rdx - 1]
movabs r8, 4611686018427387903
and r8, rcx
cmp rdx, r8
cmovb r8, rdx
add rbx, -4
shr rbx, 2
cmp rdx, rbx
cmovb rbx, rdx
mov rcx, rsi
mov rdi, rax
cmp r8, 8
cmp rbx, 8
jb .LBB0_2
mov r9, rax
sub r9, rsi
mov r8, rax
sub r8, rsi
mov rcx, rsi
mov rdi, rax
cmp r9, 31
cmp r8, 31
jbe .LBB0_2
inc r8
mov ecx, r8d
inc rbx
mov ecx, ebx
and ecx, 7
mov edi, 8
cmovne rdi, rcx
sub r8, rdi
lea rcx, [rsi + 4*r8]
lea rdi, [rax + 4*r8]
xor r9d, r9d
sub rbx, rdi
lea rcx, [rsi + 4*rbx]
lea rdi, [rax + 4*rbx]
xor r8d, r8d
.LBB0_1:
movups xmm0, xmmword ptr [rsi + 4*r9]
movups xmm1, xmmword ptr [rsi + 4*r9 + 16]
movups xmmword ptr [rax + 4*r9], xmm0
movups xmmword ptr [rax + 4*r9 + 16], xmm1
add r9, 8
cmp r8, r9
movups xmm0, xmmword ptr [rsi + 4*r8]
movups xmm1, xmmword ptr [rsi + 4*r8 + 16]
movups xmmword ptr [rax + 4*r8], xmm0
movups xmmword ptr [rax + 4*r8 + 16], xmm1
add r8, 8
cmp rbx, r8
jne .LBB0_1
.LBB0_2:
lea rsi, [rsi + 4*rdx]
Expand All @@ -64,17 +62,17 @@ inspect_asm::alloc_iter_u32::exact_down:
cmp rdi, r8
jne .LBB0_3
.LBB0_4:
add rsp, 8
pop rbx
pop r14
pop r15
ret
.LBB0_5:
mov rbx, rsi
mov r14, rsi
mov rsi, rdx
mov r14, rdx
mov r15, rdx
call bump_scope::bump_scope::BumpScope<A,_,_,_>::do_alloc_slice_in_another_chunk
mov rsi, rbx
mov rdx, r14
mov rsi, r14
mov rdx, r15
test rdx, rdx
jne .LBB0_0
jmp .LBB0_4
Expand Down
Loading

0 comments on commit e9b911b

Please sign in to comment.