From e9b911ba9b9e7421f2be69e340e25e862874cbb7 Mon Sep 17 00:00:00 2001 From: bluurryy <164359728+bluurryy@users.noreply.github.com> Date: Fri, 23 Aug 2024 01:26:36 +0200 Subject: [PATCH] refactor: internal low level allocation stuff, removing a branch when allocating (fixes #25) --- .../out/x86-64/alloc_12_u32/down.asm | 5 +- .../out/x86-64/alloc_12_u32/down_a.asm | 5 +- .../out/x86-64/alloc_12_u32/up.asm | 6 +- .../out/x86-64/alloc_12_u32/up_a.asm | 4 +- .../inspect-asm/out/x86-64/alloc_big/down.asm | 2 - .../out/x86-64/alloc_big/down_a.asm | 2 - .../inspect-asm/out/x86-64/alloc_big/up.asm | 3 +- .../inspect-asm/out/x86-64/alloc_big/up_a.asm | 3 +- .../out/x86-64/alloc_iter_u32/down.asm | 1 - .../out/x86-64/alloc_iter_u32/down_a.asm | 45 ++++---- .../out/x86-64/alloc_iter_u32/exact_down.asm | 64 ++++++----- .../x86-64/alloc_iter_u32/exact_down_a.asm | 70 ++++++------ .../out/x86-64/alloc_iter_u32/exact_up.asm | 2 - .../x86-64/alloc_iter_u32/try_exact_down.asm | 65 ++++++----- .../alloc_iter_u32/try_exact_down_a.asm | 71 ++++++------ .../x86-64/alloc_iter_u32/try_exact_up_a.asm | 42 ++++---- .../out/x86-64/alloc_iter_u32/up.asm | 2 - .../down.asm | 2 - .../down_big.asm | 5 +- .../alloc_overaligned_but_size_matches/up.asm | 4 +- .../inspect-asm/out/x86-64/alloc_str/down.asm | 1 - .../out/x86-64/alloc_str/down_a.asm | 1 - .../inspect-asm/out/x86-64/alloc_u32/down.asm | 2 - .../out/x86-64/alloc_u32/down_a.asm | 2 - .../inspect-asm/out/x86-64/alloc_u32/up.asm | 6 +- .../inspect-asm/out/x86-64/alloc_u32/up_a.asm | 4 +- .../out/x86-64/alloc_u32_slice/down.asm | 1 - .../out/x86-64/alloc_u32_slice/down_a.asm | 1 - .../out/x86-64/alloc_u32_slice/up.asm | 2 - .../out/x86-64/alloc_u32_slice_clone/down.asm | 1 - .../x86-64/alloc_u32_slice_clone/down_a.asm | 1 - .../out/x86-64/alloc_u32_slice_clone/up.asm | 2 - .../inspect-asm/out/x86-64/alloc_u8/down.asm | 2 - .../out/x86-64/alloc_u8/down_a.asm | 2 - .../out/x86-64/alloc_vec3/down.asm | 2 - .../out/x86-64/alloc_vec3/down_a.asm | 2 - .../inspect-asm/out/x86-64/alloc_vec3/up.asm | 6 +- .../out/x86-64/alloc_vec3/up_a.asm | 4 +- src/any_bump.rs | 5 +- src/bump_scope.rs | 68 ++++++------ src/chunk_raw.rs | 78 +++++++------- src/error_behavior.rs | 101 ++++++++++++++++++ src/{allocation_behavior.rs => layout.rs} | 0 src/lib.rs | 85 +++------------ src/with_drop.rs | 2 +- 45 files changed, 380 insertions(+), 404 deletions(-) create mode 100644 src/error_behavior.rs rename src/{allocation_behavior.rs => layout.rs} (100%) diff --git a/crates/inspect-asm/out/x86-64/alloc_12_u32/down.asm b/crates/inspect-asm/out/x86-64/alloc_12_u32/down.asm index 52619ef..75a01ad 100644 --- a/crates/inspect-asm/out/x86-64/alloc_12_u32/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_12_u32/down.asm @@ -4,12 +4,11 @@ inspect_asm::alloc_12_u32::down: mov rax, qword ptr [rcx] mov rdx, rax sub rdx, qword ptr [rcx + 8] - cmp rdx, 48 - jb .LBB0_1 + cmp rdx, 47 + jbe .LBB0_1 and rax, -4 add rax, -48 mov qword ptr [rcx], rax - je .LBB0_1 .LBB0_0: movups xmm0, xmmword ptr [rsi] movups xmm1, xmmword ptr [rsi + 16] diff --git a/crates/inspect-asm/out/x86-64/alloc_12_u32/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_12_u32/down_a.asm index 53a9141..9533b44 100644 --- a/crates/inspect-asm/out/x86-64/alloc_12_u32/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_12_u32/down_a.asm @@ -4,11 +4,10 @@ inspect_asm::alloc_12_u32::down_a: mov rax, qword ptr [rcx] mov rdx, rax sub rdx, qword ptr [rcx + 8] - cmp rdx, 48 - jb .LBB0_1 + cmp rdx, 47 + jbe .LBB0_1 add rax, -48 mov qword ptr [rcx], rax - je .LBB0_1 .LBB0_0: movups xmm0, xmmword ptr [rsi] movups xmm1, xmmword ptr [rsi + 16] diff --git a/crates/inspect-asm/out/x86-64/alloc_12_u32/up.asm b/crates/inspect-asm/out/x86-64/alloc_12_u32/up.asm index 29749ba..93b8c1f 100644 --- a/crates/inspect-asm/out/x86-64/alloc_12_u32/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_12_u32/up.asm @@ -6,12 +6,10 @@ inspect_asm::alloc_12_u32::up: add rax, 3 and rax, -4 sub rdx, rax - cmp rdx, 48 - jb .LBB0_1 + cmp rdx, 47 + jbe .LBB0_1 lea rdx, [rax + 48] mov qword ptr [rcx], rdx - test rax, rax - je .LBB0_1 .LBB0_0: movups xmm0, xmmword ptr [rsi] movups xmm1, xmmword ptr [rsi + 16] diff --git a/crates/inspect-asm/out/x86-64/alloc_12_u32/up_a.asm b/crates/inspect-asm/out/x86-64/alloc_12_u32/up_a.asm index 81da755..8ae9ddf 100644 --- a/crates/inspect-asm/out/x86-64/alloc_12_u32/up_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_12_u32/up_a.asm @@ -4,8 +4,8 @@ inspect_asm::alloc_12_u32::up_a: mov rax, qword ptr [rcx] mov rdx, qword ptr [rcx + 8] sub rdx, rax - cmp rdx, 48 - jb .LBB0_1 + cmp rdx, 47 + jbe .LBB0_1 lea rdx, [rax + 48] mov qword ptr [rcx], rdx .LBB0_0: diff --git a/crates/inspect-asm/out/x86-64/alloc_big/down.asm b/crates/inspect-asm/out/x86-64/alloc_big/down.asm index 7eecac6..8913138 100644 --- a/crates/inspect-asm/out/x86-64/alloc_big/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_big/down.asm @@ -10,8 +10,6 @@ inspect_asm::alloc_big::down: cmp rdi, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rdi - test rdi, rdi - je .LBB0_0 mov edx, 512 pop rbx jmp qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_big/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_big/down_a.asm index dc201d2..6d9fdd2 100644 --- a/crates/inspect-asm/out/x86-64/alloc_big/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_big/down_a.asm @@ -10,8 +10,6 @@ inspect_asm::alloc_big::down_a: cmp rdi, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rdi - test rdi, rdi - je .LBB0_0 mov edx, 512 pop rbx jmp qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_big/up.asm b/crates/inspect-asm/out/x86-64/alloc_big/up.asm index 505031f..ff20f2d 100644 --- a/crates/inspect-asm/out/x86-64/alloc_big/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_big/up.asm @@ -11,9 +11,8 @@ inspect_asm::alloc_big::up: cmovae rdx, r8 cmp rdx, qword ptr [rcx + 8] ja .LBB0_0 - mov qword ptr [rcx], rdx add rdi, 512 - je .LBB0_0 + mov qword ptr [rcx], rdx mov edx, 512 pop rbx jmp qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_big/up_a.asm b/crates/inspect-asm/out/x86-64/alloc_big/up_a.asm index a72d03f..2c89162 100644 --- a/crates/inspect-asm/out/x86-64/alloc_big/up_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_big/up_a.asm @@ -11,9 +11,8 @@ inspect_asm::alloc_big::up_a: cmovae rdx, r8 cmp rdx, qword ptr [rcx + 8] ja .LBB0_0 - mov qword ptr [rcx], rdx add rdi, 512 - je .LBB0_0 + mov qword ptr [rcx], rdx mov edx, 512 pop rbx jmp qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/down.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/down.asm index 8a46810..11f05c3 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/down.asm @@ -23,7 +23,6 @@ inspect_asm::alloc_iter_u32::down: sub rax, r12 and rax, -4 mov qword ptr [rcx], rax - je .LBB0_10 .LBB0_0: mov qword ptr [rsp + 8], rax mov qword ptr [rsp + 16], 0 diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/down_a.asm index f08c84a..0d0449b 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/down_a.asm @@ -6,7 +6,7 @@ inspect_asm::alloc_iter_u32::down_a: push r12 push rbx sub rsp, 40 - mov rbx, rdi + mov r14, rdi test rdx, rdx je .LBB0_5 mov rax, rdx @@ -14,7 +14,7 @@ inspect_asm::alloc_iter_u32::down_a: jne .LBB0_11 mov r15, rsi lea r12, [4*rdx] - mov rcx, qword ptr [rbx] + mov rcx, qword ptr [r14] mov rax, qword ptr [rcx] mov rsi, rax sub rsi, qword ptr [rcx + 8] @@ -22,36 +22,35 @@ inspect_asm::alloc_iter_u32::down_a: ja .LBB0_10 sub rax, r12 mov qword ptr [rcx], rax - je .LBB0_10 .LBB0_0: mov qword ptr [rsp + 8], rax mov qword ptr [rsp + 16], 0 mov qword ptr [rsp + 24], rdx - mov qword ptr [rsp + 32], rbx + mov qword ptr [rsp + 32], r14 xor r13d, r13d - lea rbx, [rsp + 8] - xor r14d, r14d + lea r14, [rsp + 8] + xor ebx, ebx jmp .LBB0_2 .LBB0_1: - mov dword ptr [rax + 4*r14], ebp - inc r14 - mov qword ptr [rsp + 16], r14 + mov dword ptr [rax + 4*rbx], ebp + inc rbx + mov qword ptr [rsp + 16], rbx add r13, 4 cmp r12, r13 je .LBB0_3 .LBB0_2: mov ebp, dword ptr [r15 + r13] - cmp qword ptr [rsp + 24], r14 + cmp qword ptr [rsp + 24], rbx jne .LBB0_1 - mov rdi, rbx + mov rdi, r14 call bump_scope::bump_vec::BumpVec::generic_grow_cold mov rax, qword ptr [rsp + 8] - mov r14, qword ptr [rsp + 16] + mov rbx, qword ptr [rsp + 16] jmp .LBB0_1 .LBB0_3: mov rsi, qword ptr [rsp + 8] - mov rbx, qword ptr [rsp + 32] - mov rax, qword ptr [rbx] + mov r14, qword ptr [rsp + 32] + mov rax, qword ptr [r14] cmp rsi, qword ptr [rax] je .LBB0_6 .LBB0_4: @@ -60,12 +59,12 @@ inspect_asm::alloc_iter_u32::down_a: .LBB0_5: mov qword ptr [rsp + 24], rdx mov esi, 4 - xor r14d, r14d - mov rax, qword ptr [rbx] + xor ebx, ebx + mov rax, qword ptr [r14] cmp rsi, qword ptr [rax] jne .LBB0_4 .LBB0_6: - lea rdx, [4*r14] + lea rdx, [4*rbx] mov rax, qword ptr [rsp + 24] lea rax, [rsi + 4*rax] xor edi, edi @@ -81,12 +80,12 @@ inspect_asm::alloc_iter_u32::down_a: .LBB0_7: call qword ptr [rip + memcpy@GOTPCREL] .LBB0_8: - mov rcx, qword ptr [rbx] + mov rcx, qword ptr [r14] mov rax, r15 mov qword ptr [rcx], r15 - mov qword ptr [rsp + 24], r14 + mov qword ptr [rsp + 24], rbx .LBB0_9: - mov rdx, r14 + mov rdx, rbx add rsp, 40 pop rbx pop r12 @@ -96,11 +95,11 @@ inspect_asm::alloc_iter_u32::down_a: pop rbp ret .LBB0_10: - mov rdi, rbx + mov rdi, r14 mov rsi, rdx - mov r14, rdx + mov rbx, rdx call bump_scope::bump_scope::BumpScope::do_alloc_slice_in_another_chunk - mov rdx, r14 + mov rdx, rbx jmp .LBB0_0 .LBB0_11: call qword ptr [rip + bump_scope::private::capacity_overflow@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down.asm index 0bec45a..ec449f0 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down.asm @@ -1,55 +1,53 @@ inspect_asm::alloc_iter_u32::exact_down: + push r15 push r14 push rbx - push rax mov rax, rdx shr rax, 61 jne .LBB0_7 - lea r8, [4*rdx] + lea rbx, [4*rdx] mov rcx, qword ptr [rdi] mov rax, qword ptr [rcx] - mov r9, rax - sub r9, qword ptr [rcx + 8] - cmp r8, r9 + mov r8, rax + sub r8, qword ptr [rcx + 8] + cmp rbx, r8 ja .LBB0_5 - sub rax, r8 + sub rax, rbx and rax, -4 mov qword ptr [rcx], rax - je .LBB0_5 test rdx, rdx je .LBB0_4 .LBB0_0: - lea rcx, [rdx - 1] - movabs r8, 4611686018427387903 - and r8, rcx - cmp rdx, r8 - cmovb r8, rdx + add rbx, -4 + shr rbx, 2 + cmp rdx, rbx + cmovb rbx, rdx mov rcx, rsi mov rdi, rax - cmp r8, 8 + cmp rbx, 8 jb .LBB0_2 - mov r9, rax - sub r9, rsi + mov r8, rax + sub r8, rsi mov rcx, rsi mov rdi, rax - cmp r9, 31 + cmp r8, 31 jbe .LBB0_2 - inc r8 - mov ecx, r8d + inc rbx + mov ecx, ebx and ecx, 7 mov edi, 8 cmovne rdi, rcx - sub r8, rdi - lea rcx, [rsi + 4*r8] - lea rdi, [rax + 4*r8] - xor r9d, r9d + sub rbx, rdi + lea rcx, [rsi + 4*rbx] + lea rdi, [rax + 4*rbx] + xor r8d, r8d .LBB0_1: - movups xmm0, xmmword ptr [rsi + 4*r9] - movups xmm1, xmmword ptr [rsi + 4*r9 + 16] - movups xmmword ptr [rax + 4*r9], xmm0 - movups xmmword ptr [rax + 4*r9 + 16], xmm1 - add r9, 8 - cmp r8, r9 + movups xmm0, xmmword ptr [rsi + 4*r8] + movups xmm1, xmmword ptr [rsi + 4*r8 + 16] + movups xmmword ptr [rax + 4*r8], xmm0 + movups xmmword ptr [rax + 4*r8 + 16], xmm1 + add r8, 8 + cmp rbx, r8 jne .LBB0_1 .LBB0_2: lea rsi, [rsi + 4*rdx] @@ -64,17 +62,17 @@ inspect_asm::alloc_iter_u32::exact_down: cmp rdi, r8 jne .LBB0_3 .LBB0_4: - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_5: - mov rbx, rsi + mov r14, rsi mov rsi, rdx - mov r14, rdx + mov r15, rdx call bump_scope::bump_scope::BumpScope::do_alloc_slice_in_another_chunk - mov rsi, rbx - mov rdx, r14 + mov rsi, r14 + mov rdx, r15 test rdx, rdx jne .LBB0_0 jmp .LBB0_4 diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down_a.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down_a.asm index be6c14e..10866d3 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_down_a.asm @@ -1,54 +1,52 @@ inspect_asm::alloc_iter_u32::exact_down_a: + push r15 push r14 push rbx - push rax mov rax, rdx shr rax, 61 jne .LBB0_7 - lea rcx, [4*rdx] - mov r8, qword ptr [rdi] - mov rax, qword ptr [r8] - mov r9, rax - sub r9, qword ptr [r8 + 8] - cmp rcx, r9 + lea rbx, [4*rdx] + mov rcx, qword ptr [rdi] + mov rax, qword ptr [rcx] + mov r8, rax + sub r8, qword ptr [rcx + 8] + cmp rbx, r8 ja .LBB0_5 - sub rax, rcx - mov qword ptr [r8], rax - je .LBB0_5 + sub rax, rbx + mov qword ptr [rcx], rax test rdx, rdx je .LBB0_4 .LBB0_0: - lea rcx, [rdx - 1] - movabs r8, 4611686018427387903 - and r8, rcx - cmp rdx, r8 - cmovb r8, rdx + add rbx, -4 + shr rbx, 2 + cmp rdx, rbx + cmovb rbx, rdx mov rcx, rsi mov rdi, rax - cmp r8, 8 + cmp rbx, 8 jb .LBB0_2 - mov r9, rax - sub r9, rsi + mov r8, rax + sub r8, rsi mov rcx, rsi mov rdi, rax - cmp r9, 31 + cmp r8, 31 jbe .LBB0_2 - inc r8 - mov ecx, r8d + inc rbx + mov ecx, ebx and ecx, 7 mov edi, 8 cmovne rdi, rcx - sub r8, rdi - lea rcx, [rsi + 4*r8] - lea rdi, [rax + 4*r8] - xor r9d, r9d + sub rbx, rdi + lea rcx, [rsi + 4*rbx] + lea rdi, [rax + 4*rbx] + xor r8d, r8d .LBB0_1: - movups xmm0, xmmword ptr [rsi + 4*r9] - movups xmm1, xmmword ptr [rsi + 4*r9 + 16] - movups xmmword ptr [rax + 4*r9], xmm0 - movups xmmword ptr [rax + 4*r9 + 16], xmm1 - add r9, 8 - cmp r8, r9 + movups xmm0, xmmword ptr [rsi + 4*r8] + movups xmm1, xmmword ptr [rsi + 4*r8 + 16] + movups xmmword ptr [rax + 4*r8], xmm0 + movups xmmword ptr [rax + 4*r8 + 16], xmm1 + add r8, 8 + cmp rbx, r8 jne .LBB0_1 .LBB0_2: lea rsi, [rsi + 4*rdx] @@ -63,17 +61,17 @@ inspect_asm::alloc_iter_u32::exact_down_a: cmp rdi, r8 jne .LBB0_3 .LBB0_4: - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_5: - mov rbx, rsi + mov r14, rsi mov rsi, rdx - mov r14, rdx + mov r15, rdx call bump_scope::bump_scope::BumpScope::do_alloc_slice_in_another_chunk - mov rsi, rbx - mov rdx, r14 + mov rsi, r14 + mov rdx, r15 test rdx, rdx jne .LBB0_0 jmp .LBB0_4 diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_up.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_up.asm index 48e433b..a86cb00 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/exact_up.asm @@ -16,8 +16,6 @@ inspect_asm::alloc_iter_u32::exact_up: ja .LBB0_5 add rcx, rax mov qword ptr [r8], rcx - test rax, rax - je .LBB0_5 test rdx, rdx je .LBB0_4 .LBB0_0: diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down.asm index 1b2a6ac..72351bc 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down.asm @@ -1,62 +1,61 @@ inspect_asm::alloc_iter_u32::try_exact_down: + push r15 push r14 push rbx - push rax mov rax, rdx shr rax, 61 je .LBB0_1 xor eax, eax .LBB0_0: - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_1: - lea r8, [4*rdx] + lea rbx, [4*rdx] mov rcx, qword ptr [rdi] mov rax, qword ptr [rcx] - mov r9, rax - sub r9, qword ptr [rcx + 8] - cmp r8, r9 + mov r8, rax + sub r8, qword ptr [rcx + 8] + cmp rbx, r8 ja .LBB0_6 - sub rax, r8 + sub rax, rbx and rax, -4 mov qword ptr [rcx], rax je .LBB0_6 .LBB0_2: test rdx, rdx je .LBB0_0 - lea rcx, [rdx - 1] - movabs r8, 4611686018427387903 - and r8, rcx - cmp rdx, r8 - cmovb r8, rdx + add rbx, -4 + shr rbx, 2 + cmp rdx, rbx + cmovb rbx, rdx mov rcx, rax mov rdi, rsi - cmp r8, 8 + cmp rbx, 8 jb .LBB0_4 - mov r9, rax - sub r9, rsi + mov r8, rax + sub r8, rsi mov rcx, rax mov rdi, rsi - cmp r9, 32 + cmp r8, 32 jb .LBB0_4 - inc r8 - mov ecx, r8d + inc rbx + mov ecx, ebx and ecx, 7 mov edi, 8 cmovne rdi, rcx - sub r8, rdi - lea rcx, [rax + 4*r8] - lea rdi, [rsi + 4*r8] - xor r9d, r9d + sub rbx, rdi + lea rcx, [rax + 4*rbx] + lea rdi, [rsi + 4*rbx] + xor r8d, r8d .LBB0_3: - movups xmm0, xmmword ptr [rsi + 4*r9] - movups xmm1, xmmword ptr [rsi + 4*r9 + 16] - movups xmmword ptr [rax + 4*r9], xmm0 - movups xmmword ptr [rax + 4*r9 + 16], xmm1 - add r9, 8 - cmp r8, r9 + movups xmm0, xmmword ptr [rsi + 4*r8] + movups xmm1, xmmword ptr [rsi + 4*r8 + 16] + movups xmmword ptr [rax + 4*r8], xmm0 + movups xmmword ptr [rax + 4*r8 + 16], xmm1 + add r8, 8 + cmp rbx, r8 jne .LBB0_3 .LBB0_4: lea rsi, [rsi + 4*rdx] @@ -72,18 +71,18 @@ inspect_asm::alloc_iter_u32::try_exact_down: jne .LBB0_5 jmp .LBB0_0 .LBB0_6: - mov rbx, rsi + mov r14, rsi mov rsi, rdx - mov r14, rdx + mov r15, rdx call bump_scope::bump_scope::BumpScope::do_alloc_slice_in_another_chunk - mov rsi, rbx - mov rdx, r14 + mov rsi, r14 + mov rdx, r15 test rax, rax jne .LBB0_2 xor eax, eax - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_7: call qword ptr [rip + bump_scope::exact_size_iterator_bad_len@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down_a.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down_a.asm index e9dac63..e930501 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_down_a.asm @@ -1,61 +1,60 @@ inspect_asm::alloc_iter_u32::try_exact_down_a: + push r15 push r14 push rbx - push rax mov rax, rdx shr rax, 61 je .LBB0_1 xor eax, eax .LBB0_0: - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_1: - lea rcx, [4*rdx] - mov r8, qword ptr [rdi] - mov rax, qword ptr [r8] - mov r9, rax - sub r9, qword ptr [r8 + 8] - cmp rcx, r9 + lea rbx, [4*rdx] + mov rcx, qword ptr [rdi] + mov rax, qword ptr [rcx] + mov r8, rax + sub r8, qword ptr [rcx + 8] + cmp rbx, r8 ja .LBB0_6 - sub rax, rcx - mov qword ptr [r8], rax + sub rax, rbx + mov qword ptr [rcx], rax je .LBB0_6 .LBB0_2: test rdx, rdx je .LBB0_0 - lea rcx, [rdx - 1] - movabs r8, 4611686018427387903 - and r8, rcx - cmp rdx, r8 - cmovb r8, rdx + add rbx, -4 + shr rbx, 2 + cmp rdx, rbx + cmovb rbx, rdx mov rcx, rax mov rdi, rsi - cmp r8, 8 + cmp rbx, 8 jb .LBB0_4 - mov r9, rax - sub r9, rsi + mov r8, rax + sub r8, rsi mov rcx, rax mov rdi, rsi - cmp r9, 32 + cmp r8, 32 jb .LBB0_4 - inc r8 - mov ecx, r8d + inc rbx + mov ecx, ebx and ecx, 7 mov edi, 8 cmovne rdi, rcx - sub r8, rdi - lea rcx, [rax + 4*r8] - lea rdi, [rsi + 4*r8] - xor r9d, r9d + sub rbx, rdi + lea rcx, [rax + 4*rbx] + lea rdi, [rsi + 4*rbx] + xor r8d, r8d .LBB0_3: - movups xmm0, xmmword ptr [rsi + 4*r9] - movups xmm1, xmmword ptr [rsi + 4*r9 + 16] - movups xmmword ptr [rax + 4*r9], xmm0 - movups xmmword ptr [rax + 4*r9 + 16], xmm1 - add r9, 8 - cmp r8, r9 + movups xmm0, xmmword ptr [rsi + 4*r8] + movups xmm1, xmmword ptr [rsi + 4*r8 + 16] + movups xmmword ptr [rax + 4*r8], xmm0 + movups xmmword ptr [rax + 4*r8 + 16], xmm1 + add r8, 8 + cmp rbx, r8 jne .LBB0_3 .LBB0_4: lea rsi, [rsi + 4*rdx] @@ -71,18 +70,18 @@ inspect_asm::alloc_iter_u32::try_exact_down_a: jne .LBB0_5 jmp .LBB0_0 .LBB0_6: - mov rbx, rsi + mov r14, rsi mov rsi, rdx - mov r14, rdx + mov r15, rdx call bump_scope::bump_scope::BumpScope::do_alloc_slice_in_another_chunk - mov rsi, rbx - mov rdx, r14 + mov rsi, r14 + mov rdx, r15 test rax, rax jne .LBB0_2 xor eax, eax - add rsp, 8 pop rbx pop r14 + pop r15 ret .LBB0_7: call qword ptr [rip + bump_scope::exact_size_iterator_bad_len@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_up_a.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_up_a.asm index 7b5b7c4..06055e2 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_up_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/try_exact_up_a.asm @@ -4,19 +4,26 @@ inspect_asm::alloc_iter_u32::try_exact_up_a: push rax mov rax, rdx shr rax, 61 - jne .LBB0_6 + je .LBB0_1 + xor eax, eax +.LBB0_0: + add rsp, 8 + pop rbx + pop r14 + ret +.LBB0_1: lea rcx, [4*rdx] mov r8, qword ptr [rdi] mov rax, qword ptr [r8] mov r9, qword ptr [r8 + 8] sub r9, rax cmp rcx, r9 - ja .LBB0_5 + ja .LBB0_6 add rcx, rax mov qword ptr [r8], rcx -.LBB0_0: +.LBB0_2: test rdx, rdx - je .LBB0_4 + je .LBB0_0 lea rcx, [rdx - 1] movabs r8, 4611686018427387903 and r8, rcx @@ -25,13 +32,13 @@ inspect_asm::alloc_iter_u32::try_exact_up_a: mov rcx, rax mov rdi, rsi cmp r8, 8 - jb .LBB0_2 + jb .LBB0_4 mov r9, rax sub r9, rsi mov rcx, rax mov rdi, rsi cmp r9, 32 - jb .LBB0_2 + jb .LBB0_4 inc r8 mov ecx, r8d and ecx, 7 @@ -41,18 +48,18 @@ inspect_asm::alloc_iter_u32::try_exact_up_a: lea rcx, [rax + 4*r8] lea rdi, [rsi + 4*r8] xor r9d, r9d -.LBB0_1: +.LBB0_3: movups xmm0, xmmword ptr [rsi + 4*r9] movups xmm1, xmmword ptr [rsi + 4*r9 + 16] movups xmmword ptr [rax + 4*r9], xmm0 movups xmmword ptr [rax + 4*r9 + 16], xmm1 add r9, 8 cmp r8, r9 - jne .LBB0_1 -.LBB0_2: + jne .LBB0_3 +.LBB0_4: lea rsi, [rsi + 4*rdx] lea r8, [rax + 4*rdx] -.LBB0_3: +.LBB0_5: cmp rdi, rsi je .LBB0_7 mov r9d, dword ptr [rdi] @@ -60,15 +67,9 @@ inspect_asm::alloc_iter_u32::try_exact_up_a: mov dword ptr [rcx], r9d add rcx, 4 cmp rcx, r8 - jne .LBB0_3 -.LBB0_4: - test rax, rax - je .LBB0_6 - add rsp, 8 - pop rbx - pop r14 - ret -.LBB0_5: + jne .LBB0_5 + jmp .LBB0_0 +.LBB0_6: mov rbx, rsi mov rsi, rdx mov r14, rdx @@ -76,8 +77,7 @@ inspect_asm::alloc_iter_u32::try_exact_up_a: mov rsi, rbx mov rdx, r14 test rax, rax - jne .LBB0_0 -.LBB0_6: + jne .LBB0_2 xor eax, eax add rsp, 8 pop rbx diff --git a/crates/inspect-asm/out/x86-64/alloc_iter_u32/up.asm b/crates/inspect-asm/out/x86-64/alloc_iter_u32/up.asm index d739f69..2189366 100644 --- a/crates/inspect-asm/out/x86-64/alloc_iter_u32/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_iter_u32/up.asm @@ -22,8 +22,6 @@ inspect_asm::alloc_iter_u32::up: ja .LBB0_7 lea rsi, [rax + r15] mov qword ptr [rcx], rsi - test rax, rax - je .LBB0_7 .LBB0_0: mov qword ptr [rsp], rax mov qword ptr [rsp + 8], 0 diff --git a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down.asm b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down.asm index 0d8a25c..9f83560 100644 --- a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down.asm @@ -6,8 +6,6 @@ inspect_asm::alloc_overaligned_but_size_matches::down: cmp rax, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_0 mov dword ptr [rax], esi pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down_big.asm b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down_big.asm index 5cbce4b..09988c0 100644 --- a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down_big.asm +++ b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/down_big.asm @@ -4,11 +4,10 @@ inspect_asm::alloc_overaligned_but_size_matches::down_big: mov rax, qword ptr [rcx] mov rdx, rax sub rdx, qword ptr [rcx + 8] - cmp rdx, 40 - jb .LBB0_1 + cmp rdx, 39 + jbe .LBB0_1 add rax, -40 mov qword ptr [rcx], rax - je .LBB0_1 .LBB0_0: mov rcx, qword ptr [rsi + 32] mov qword ptr [rax + 32], rcx diff --git a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/up.asm b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/up.asm index 7136ae5..8516be7 100644 --- a/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_overaligned_but_size_matches/up.asm @@ -4,8 +4,8 @@ inspect_asm::alloc_overaligned_but_size_matches::up: mov rax, qword ptr [rcx] mov rdx, qword ptr [rcx + 8] sub rdx, rax - cmp rdx, 4 - jb .LBB0_0 + cmp rdx, 3 + jbe .LBB0_0 lea rdx, [rax + 4] mov qword ptr [rcx], rdx mov dword ptr [rax], esi diff --git a/crates/inspect-asm/out/x86-64/alloc_str/down.asm b/crates/inspect-asm/out/x86-64/alloc_str/down.asm index a0f628f..7bfe302 100644 --- a/crates/inspect-asm/out/x86-64/alloc_str/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_str/down.asm @@ -11,7 +11,6 @@ inspect_asm::alloc_str::down: jb .LBB0_1 sub rbx, r14 mov qword ptr [rax], rbx - je .LBB0_1 .LBB0_0: mov rdi, rbx mov rdx, r14 diff --git a/crates/inspect-asm/out/x86-64/alloc_str/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_str/down_a.asm index 8aa34b7..0aa9cdd 100644 --- a/crates/inspect-asm/out/x86-64/alloc_str/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_str/down_a.asm @@ -12,7 +12,6 @@ inspect_asm::alloc_str::down_a: sub rbx, r14 and rbx, -4 mov qword ptr [rax], rbx - je .LBB0_1 .LBB0_0: mov rdi, rbx mov rdx, r14 diff --git a/crates/inspect-asm/out/x86-64/alloc_u32/down.asm b/crates/inspect-asm/out/x86-64/alloc_u32/down.asm index 3cb5105..97414f9 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32/down.asm @@ -7,8 +7,6 @@ inspect_asm::alloc_u32::down: cmp rax, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_0 mov dword ptr [rax], esi pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_u32/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_u32/down_a.asm index 07eb6c7..bf8d93e 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32/down_a.asm @@ -6,8 +6,6 @@ inspect_asm::alloc_u32::down_a: cmp rax, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_0 mov dword ptr [rax], esi pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_u32/up.asm b/crates/inspect-asm/out/x86-64/alloc_u32/up.asm index ac753aa..92bc060 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32/up.asm @@ -6,12 +6,10 @@ inspect_asm::alloc_u32::up: add rax, 3 and rax, -4 sub rdx, rax - cmp rdx, 4 - jb .LBB0_0 + cmp rdx, 3 + jbe .LBB0_0 lea rdx, [rax + 4] mov qword ptr [rcx], rdx - test rax, rax - je .LBB0_0 mov dword ptr [rax], esi pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_u32/up_a.asm b/crates/inspect-asm/out/x86-64/alloc_u32/up_a.asm index 6d2ae6b..ed686ea 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32/up_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32/up_a.asm @@ -4,8 +4,8 @@ inspect_asm::alloc_u32::up_a: mov rax, qword ptr [rcx] mov rdx, qword ptr [rcx + 8] sub rdx, rax - cmp rdx, 4 - jb .LBB0_0 + cmp rdx, 3 + jbe .LBB0_0 lea rdx, [rax + 4] mov qword ptr [rcx], rdx mov dword ptr [rax], esi diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice/down.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice/down.asm index fe02e4e..a19cc9a 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice/down.asm @@ -13,7 +13,6 @@ inspect_asm::alloc_u32_slice::down: sub r14, rdx and r14, -4 mov qword ptr [rax], r14 - je .LBB0_1 .LBB0_0: mov rdi, r14 call qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice/down_a.asm index 9471114..3259c06 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice/down_a.asm @@ -12,7 +12,6 @@ inspect_asm::alloc_u32_slice::down_a: ja .LBB0_1 sub r14, rdx mov qword ptr [rax], r14 - je .LBB0_1 .LBB0_0: mov rdi, r14 call qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice/up.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice/up.asm index eb4013a..f614cf6 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice/up.asm @@ -14,8 +14,6 @@ inspect_asm::alloc_u32_slice::up: ja .LBB0_1 lea rcx, [r14 + rdx] mov qword ptr [rax], rcx - test r14, r14 - je .LBB0_1 .LBB0_0: mov rdi, r14 call qword ptr [rip + memcpy@GOTPCREL] diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down.asm index 35f95b0..7db37f6 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down.asm @@ -12,7 +12,6 @@ inspect_asm::alloc_u32_slice_clone::down: sub r14, rbx and r14, -4 mov qword ptr [rax], r14 - je .LBB0_2 test rdx, rdx je .LBB0_1 .LBB0_0: diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down_a.asm index a2b89e6..68e8c2c 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/down_a.asm @@ -11,7 +11,6 @@ inspect_asm::alloc_u32_slice_clone::down_a: ja .LBB0_2 sub r14, rbx mov qword ptr [rax], r14 - je .LBB0_2 test rdx, rdx je .LBB0_1 .LBB0_0: diff --git a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/up.asm b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/up.asm index 032be33..e87e71e 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u32_slice_clone/up.asm @@ -13,8 +13,6 @@ inspect_asm::alloc_u32_slice_clone::up: ja .LBB0_2 lea rcx, [r14 + rbx] mov qword ptr [rax], rcx - test r14, r14 - je .LBB0_2 test rdx, rdx je .LBB0_1 .LBB0_0: diff --git a/crates/inspect-asm/out/x86-64/alloc_u8/down.asm b/crates/inspect-asm/out/x86-64/alloc_u8/down.asm index a667f66..8b0e0cf 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u8/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u8/down.asm @@ -6,8 +6,6 @@ inspect_asm::alloc_u8::down: cmp rax, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_0 mov byte ptr [rax], sil pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_u8/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_u8/down_a.asm index bd0aca7..1f865fe 100644 --- a/crates/inspect-asm/out/x86-64/alloc_u8/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_u8/down_a.asm @@ -7,8 +7,6 @@ inspect_asm::alloc_u8::down_a: cmp rax, qword ptr [rcx + 8] jb .LBB0_0 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_0 mov byte ptr [rax], sil pop rbx ret diff --git a/crates/inspect-asm/out/x86-64/alloc_vec3/down.asm b/crates/inspect-asm/out/x86-64/alloc_vec3/down.asm index 6749f58..9089a4f 100644 --- a/crates/inspect-asm/out/x86-64/alloc_vec3/down.asm +++ b/crates/inspect-asm/out/x86-64/alloc_vec3/down.asm @@ -7,8 +7,6 @@ inspect_asm::alloc_vec3::down: cmp rax, qword ptr [rcx + 8] jb .LBB0_1 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_1 .LBB0_0: mov ecx, dword ptr [rsi + 8] mov dword ptr [rax + 8], ecx diff --git a/crates/inspect-asm/out/x86-64/alloc_vec3/down_a.asm b/crates/inspect-asm/out/x86-64/alloc_vec3/down_a.asm index 3631662..c92202f 100644 --- a/crates/inspect-asm/out/x86-64/alloc_vec3/down_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_vec3/down_a.asm @@ -6,8 +6,6 @@ inspect_asm::alloc_vec3::down_a: cmp rax, qword ptr [rcx + 8] jb .LBB0_1 mov qword ptr [rcx], rax - test rax, rax - je .LBB0_1 .LBB0_0: mov ecx, dword ptr [rsi + 8] mov dword ptr [rax + 8], ecx diff --git a/crates/inspect-asm/out/x86-64/alloc_vec3/up.asm b/crates/inspect-asm/out/x86-64/alloc_vec3/up.asm index 9b782d1..51ba647 100644 --- a/crates/inspect-asm/out/x86-64/alloc_vec3/up.asm +++ b/crates/inspect-asm/out/x86-64/alloc_vec3/up.asm @@ -6,12 +6,10 @@ inspect_asm::alloc_vec3::up: add rax, 3 and rax, -4 sub rdx, rax - cmp rdx, 12 - jb .LBB0_1 + cmp rdx, 11 + jbe .LBB0_1 lea rdx, [rax + 12] mov qword ptr [rcx], rdx - test rax, rax - je .LBB0_1 .LBB0_0: mov ecx, dword ptr [rsi + 8] mov dword ptr [rax + 8], ecx diff --git a/crates/inspect-asm/out/x86-64/alloc_vec3/up_a.asm b/crates/inspect-asm/out/x86-64/alloc_vec3/up_a.asm index 6a38814..d445400 100644 --- a/crates/inspect-asm/out/x86-64/alloc_vec3/up_a.asm +++ b/crates/inspect-asm/out/x86-64/alloc_vec3/up_a.asm @@ -4,8 +4,8 @@ inspect_asm::alloc_vec3::up_a: mov rax, qword ptr [rcx] mov rdx, qword ptr [rcx + 8] sub rdx, rax - cmp rdx, 12 - jb .LBB0_1 + cmp rdx, 11 + jbe .LBB0_1 lea rdx, [rax + 12] mov qword ptr [rcx], rdx .LBB0_0: diff --git a/src/any_bump.rs b/src/any_bump.rs index 7e080d5..15561ce 100644 --- a/src/any_bump.rs +++ b/src/any_bump.rs @@ -4,8 +4,7 @@ use core::{alloc::Layout, mem::MaybeUninit, ptr::NonNull}; use core::fmt; use crate::{ - allocation_behavior::LayoutProps, BaseAllocator, Bump, BumpBox, BumpScope, ErrorBehavior, MinimumAlignment, - SupportedMinimumAlignment, + layout::LayoutProps, BaseAllocator, Bump, BumpBox, BumpScope, ErrorBehavior, MinimumAlignment, SupportedMinimumAlignment, }; pub(crate) trait Sealed { @@ -194,7 +193,7 @@ where #[inline(always)] fn alloc_in_current_chunk(&self, layout: impl LayoutProps) -> Option> { - self.chunk.get().alloc::(layout) + self.chunk.get().alloc(MinimumAlignment::, layout) } #[inline(always)] diff --git a/src/bump_scope.rs b/src/bump_scope.rs index d67f117..995010a 100644 --- a/src/bump_scope.rs +++ b/src/bump_scope.rs @@ -13,11 +13,11 @@ use core::{ use allocator_api2::alloc::AllocError; use crate::{ - allocation_behavior::{ArrayLayout, CustomLayout, LayoutProps, SizedLayout}, bump_align_guard::BumpAlignGuard, bump_common_methods, bump_scope_methods, chunk_size::ChunkSize, const_param_assert, doc_align_cant_decrease, + layout::{ArrayLayout, CustomLayout, LayoutProps, SizedLayout}, polyfill::{nonnull, pointer}, BaseAllocator, BumpScopeGuard, Checkpoint, ErrorBehavior, GuaranteedAllocatedStats, MinimumAlignment, RawChunk, SizedTypeProperties, Stats, SupportedMinimumAlignment, WithoutDealloc, WithoutShrink, DEFAULT_START_CHUNK_SIZE, @@ -217,9 +217,9 @@ where Err(_) => return Err(B::capacity_overflow()), }; - let range = match self.chunk.get().alloc_greedy::(layout) { + let range = match self.chunk.get().alloc_greedy(MinimumAlignment::, layout) { Some(ptr) => ptr, - None => self.alloc_greedy_in_another_chunk(CustomLayout(*layout))?, + None => self.alloc_greedy_in_another_chunk(*layout)?, }; Ok(range.start.cast::()..range.end.cast::()) @@ -227,16 +227,18 @@ where #[cold] #[inline(never)] - pub(crate) fn alloc_greedy_in_another_chunk( - &self, - layout: CustomLayout, - ) -> Result>, E> { - unsafe { self.do_custom_alloc_in_another_chunk(layout, RawChunk::alloc_greedy::) } + pub(crate) fn alloc_greedy_in_another_chunk(&self, layout: Layout) -> Result>, E> { + let layout = CustomLayout(layout); + unsafe { + self.do_custom_alloc_in_another_chunk(layout, |chunk, layout| { + chunk.alloc_greedy(MinimumAlignment::, layout) + }) + } } #[inline(always)] pub(crate) fn alloc_in_current_chunk(&self, layout: Layout) -> Option> { - self.chunk.get().alloc::(CustomLayout(layout)) + self.chunk.get().alloc(MinimumAlignment::, CustomLayout(layout)) } /// Allocation slow path. @@ -245,21 +247,21 @@ where #[inline(never)] pub(crate) fn alloc_in_another_chunk(&self, layout: Layout) -> Result, E> { unsafe { - self.do_custom_alloc_in_another_chunk(CustomLayout(layout), RawChunk::alloc_inner::) + self.do_custom_alloc_in_another_chunk(CustomLayout(layout), |chunk, layout| { + chunk.alloc(MinimumAlignment::, layout) + }) } } #[inline(always)] pub(crate) fn do_alloc_sized(&self) -> Result, E> { - let result = match self.chunk.get().alloc::(SizedLayout::new::()) { - Some(ptr) => Ok(ptr), - None => self.do_alloc_sized_in_another_chunk::(), - }; - - match result { - Ok(ptr) => Ok(ptr.cast()), - Err(error) => Err(error), - } + E::alloc_or_else( + self.chunk.get(), + MinimumAlignment::, + SizedLayout::new::(), + || self.do_alloc_sized_in_another_chunk::(), + ) + .map(NonNull::cast) } #[cold] @@ -279,30 +281,20 @@ where Err(_) => return Err(E::capacity_overflow()), }; - let result = match self.chunk.get().alloc::(layout) { - Some(ptr) => Ok(ptr), - None => unsafe { self.do_alloc_slice_in_another_chunk::(len) }, - }; - - match result { - Ok(ptr) => Ok(ptr.cast()), - Err(error) => Err(error), - } + E::alloc_or_else(self.chunk.get(), MinimumAlignment::, layout, || unsafe { + self.do_alloc_slice_in_another_chunk::(len) + }) + .map(NonNull::cast) } #[inline(always)] - pub(crate) fn do_alloc_slice_for(&self, value: &[T]) -> Result, E> { + pub(crate) fn do_alloc_slice_for(&self, value: &[T]) -> Result, E> { let layout = ArrayLayout::for_value(value); - let result = match self.chunk.get().alloc::(layout) { - Some(ptr) => Ok(ptr), - None => unsafe { self.do_alloc_slice_in_another_chunk::(value.len()) }, - }; - - match result { - Ok(ptr) => Ok(nonnull::slice_from_raw_parts(ptr.cast(), value.len())), - Err(error) => Err(error), - } + E::alloc_or_else(self.chunk.get(), MinimumAlignment::, layout, || unsafe { + self.do_alloc_slice_in_another_chunk::(value.len()) + }) + .map(NonNull::cast) } #[cold] diff --git a/src/chunk_raw.rs b/src/chunk_raw.rs index b411283..cf39ab1 100644 --- a/src/chunk_raw.rs +++ b/src/chunk_raw.rs @@ -1,8 +1,8 @@ use core::{alloc::Layout, cell::Cell, num::NonZeroUsize, ops::Range, ptr::NonNull}; use crate::{ - allocation_behavior::LayoutProps, down_align_usize, + layout::LayoutProps, polyfill::{const_unwrap, nonnull, pointer}, unallocated_chunk_header, up_align_nonzero, up_align_nonzero_unchecked, up_align_usize_unchecked, ChunkHeader, ChunkSize, ErrorBehavior, MinimumAlignment, SizedTypeProperties, SupportedMinimumAlignment, CHUNK_ALIGN_MIN, @@ -114,20 +114,22 @@ impl RawChunk { /// /// On success, returns a [`NonNull`] meeting the size and alignment guarantees of `layout`. #[inline(always)] - pub fn alloc(self, layout: impl LayoutProps) -> Option> + pub fn alloc(self, minimum_alignment: M, layout: L) -> Option> where - MinimumAlignment: SupportedMinimumAlignment, + M: SupportedMinimumAlignment, + L: LayoutProps, { - self.alloc_inner(layout) + self.alloc_or_else(minimum_alignment, layout, || Err(())).ok() } #[inline(always)] - pub fn alloc_inner(self, layout: L) -> Option> + pub fn alloc_or_else(self, _: M, layout: L, f: F) -> Result, E> where - MinimumAlignment: SupportedMinimumAlignment, + M: SupportedMinimumAlignment, L: LayoutProps, + F: FnOnce() -> Result, E>, { - debug_assert!(nonnull::is_aligned_to(self.pos(), MIN_ALIGN)); + debug_assert!(nonnull::is_aligned_to(self.pos(), M::MIN_ALIGN)); if L::SIZE_IS_MULTIPLE_OF_ALIGN { debug_assert!(layout.size() % layout.align() == 0); @@ -150,7 +152,7 @@ impl RawChunk { if L::ALIGN_IS_CONST && layout.align() <= CHUNK_ALIGN_MIN { // Constant, small alignment fast path! - if L::ALIGN_IS_CONST && layout.align() <= MIN_ALIGN { + if L::ALIGN_IS_CONST && layout.align() <= M::MIN_ALIGN { // alignment is already sufficient } else { // Aligning an address that is `<= range.end` with an alignment @@ -162,7 +164,7 @@ impl RawChunk { let remaining = end - start; if layout.size() > remaining { - return None; + return f(); } // doesn't exceed `end` because of the check above @@ -184,30 +186,30 @@ impl RawChunk { // due to `end` being always aligned to `CHUNK_ALIGN_MIN`, it can't be `usize::MAX`; // thus when `new_pos` is `usize::MAX` this will always return None; if new_pos > end { - return None; + return f(); } // doesn't exceed `end` because `aligned_down + align + size` didn't start = aligned_down + layout.align(); }; - if (L::ALIGN_IS_CONST && L::SIZE_IS_MULTIPLE_OF_ALIGN && layout.align() >= MIN_ALIGN) - || (L::SIZE_IS_CONST && (layout.size() % MIN_ALIGN == 0)) + if (L::ALIGN_IS_CONST && L::SIZE_IS_MULTIPLE_OF_ALIGN && layout.align() >= M::MIN_ALIGN) + || (L::SIZE_IS_CONST && (layout.size() % M::MIN_ALIGN == 0)) { // we are already aligned to `MIN_ALIGN` } else { // up aligning an address `<= range.end` with an alignment `<= CHUNK_ALIGN_MIN` (which `MIN_ALIGN` is) // can not exceed `range.end`, and thus also can't overflow - new_pos = up_align_usize_unchecked(new_pos, MIN_ALIGN); + new_pos = up_align_usize_unchecked(new_pos, M::MIN_ALIGN); } debug_assert!(is_aligned(start, layout.align())); - debug_assert!(is_aligned(start, MIN_ALIGN)); - debug_assert!(is_aligned(new_pos, MIN_ALIGN)); + debug_assert!(is_aligned(start, M::MIN_ALIGN)); + debug_assert!(is_aligned(new_pos, M::MIN_ALIGN)); unsafe { self.set_pos(self.with_addr(new_pos)); - Some(self.with_addr(start)) + Ok(self.with_addr(start)) } } else { let start = nonnull::addr(remaining.start).get(); @@ -221,41 +223,41 @@ impl RawChunk { end -= layout.size(); let needs_align_for_min_align = - (!L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() < MIN_ALIGN) - && (!L::SIZE_IS_CONST || (layout.size() % MIN_ALIGN != 0)); + (!L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() < M::MIN_ALIGN) + && (!L::SIZE_IS_CONST || (layout.size() % M::MIN_ALIGN != 0)); let needs_align_for_layout = - !L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() > MIN_ALIGN; + !L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() > M::MIN_ALIGN; if needs_align_for_min_align || needs_align_for_layout { // At this point layout's align is const, because we assume `L::SIZE_IS_CONST` implies `L::ALIGN_IS_CONST`. // That means `max` is evaluated at compile time, so we don't bother having different cases for either alignment. - end = down_align_usize(end, layout.align().max(MIN_ALIGN)); + end = down_align_usize(end, layout.align().max(M::MIN_ALIGN)); } if end < start { - return None; + return f(); } } else if L::ALIGN_IS_CONST && layout.align() <= CHUNK_ALIGN_MIN { // Constant, small alignment fast path! let remaining = end - start; if layout.size() > remaining { - return None; + return f(); } // doesn't overflow because of the check above end -= layout.size(); let needs_align_for_min_align = - (!L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() < MIN_ALIGN) - && (!L::SIZE_IS_CONST || (layout.size() % MIN_ALIGN != 0)); + (!L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() < M::MIN_ALIGN) + && (!L::SIZE_IS_CONST || (layout.size() % M::MIN_ALIGN != 0)); let needs_align_for_layout = - !L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() > MIN_ALIGN; + !L::ALIGN_IS_CONST || !L::SIZE_IS_MULTIPLE_OF_ALIGN || layout.align() > M::MIN_ALIGN; if needs_align_for_min_align || needs_align_for_layout { // down aligning an address `>= range.start` with an alignment `<= CHUNK_ALIGN_MIN` (which `layout.align()` is) // can not exceed `range.start`, and thus also can't overflow - end = down_align_usize(end, layout.align().max(MIN_ALIGN)); + end = down_align_usize(end, layout.align().max(M::MIN_ALIGN)); } } else { // Alignment is `> CHUNK_ALIGN_MIN` or unknown. @@ -263,22 +265,22 @@ impl RawChunk { // this could also be a `checked_sub`, but we use `saturating_sub` to save us a branch; // the `if` below will return None if the addition saturated and returned `0` end = end.saturating_sub(layout.size()); - end = down_align_usize(end, layout.align().max(MIN_ALIGN)); + end = down_align_usize(end, layout.align().max(M::MIN_ALIGN)); // note that `end` being `0` is an invalid value for `end` and we MUST return None; // due to `start` being `NonNull`, it can't be `0`; // thus when `end` is `0` this will always return None; if end < start { - return None; + return f(); } }; debug_assert!(is_aligned(end, layout.align())); - debug_assert!(is_aligned(end, MIN_ALIGN)); + debug_assert!(is_aligned(end, M::MIN_ALIGN)); unsafe { self.set_pos(self.with_addr(end)); - Some(self.with_addr(end)) + Ok(self.with_addr(end)) } } } @@ -380,17 +382,9 @@ impl RawChunk { /// [`MutBumpVec`]: crate::MutBumpVec /// [`into_slice`]: crate::MutBumpVec::into_slice #[inline(always)] - pub fn alloc_greedy(self, layout: impl LayoutProps) -> Option>> + pub fn alloc_greedy(self, _: M, layout: L) -> Option>> where - MinimumAlignment: SupportedMinimumAlignment, - { - self.alloc_greedy_inner(layout) - } - - #[inline(always)] - pub fn alloc_greedy_inner(self, layout: L) -> Option>> - where - MinimumAlignment: SupportedMinimumAlignment, + M: SupportedMinimumAlignment, L: LayoutProps, { debug_assert_ne!(layout.size(), 0); @@ -403,7 +397,7 @@ impl RawChunk { debug_assert!(start <= end); debug_assert!(end.get() % CHUNK_ALIGN_MIN == 0); - if L::ALIGN_IS_CONST && layout.align() <= MIN_ALIGN { + if L::ALIGN_IS_CONST && layout.align() <= M::MIN_ALIGN { // alignment is already sufficient } else { // `start` needs to be aligned @@ -441,7 +435,7 @@ impl RawChunk { let mut end = end.get(); - if L::ALIGN_IS_CONST && layout.align() <= MIN_ALIGN { + if L::ALIGN_IS_CONST && layout.align() <= M::MIN_ALIGN { // alignment is already sufficient } else { end = down_align_usize(end, layout.align()); diff --git a/src/error_behavior.rs b/src/error_behavior.rs new file mode 100644 index 0000000..7347877 --- /dev/null +++ b/src/error_behavior.rs @@ -0,0 +1,101 @@ +use layout::LayoutProps; + +use crate::{ + capacity_overflow, handle_alloc_error, layout, AllocError, Infallible, Layout, NonNull, RawChunk, + SupportedMinimumAlignment, +}; + +pub(crate) trait ErrorBehavior: Sized { + fn allocation(layout: Layout) -> Self; + fn capacity_overflow() -> Self; + fn fixed_size_vector_is_full() -> Self; + fn fixed_size_vector_no_space(amount: usize) -> Self; + + /// For the infallible case we want to inline `f` but not for the fallible one. (Because it produces better code) + fn alloc_or_else( + chunk: RawChunk, + minimum_alignment: impl SupportedMinimumAlignment, + layout: impl LayoutProps, + f: impl FnOnce() -> Result, Self>, + ) -> Result, Self>; +} + +impl ErrorBehavior for Infallible { + #[inline(always)] + fn allocation(layout: Layout) -> Self { + handle_alloc_error(layout) + } + + #[inline(always)] + fn capacity_overflow() -> Self { + capacity_overflow() + } + + #[inline(always)] + fn fixed_size_vector_is_full() -> Self { + fixed_size_vector_is_full() + } + + #[inline(always)] + fn fixed_size_vector_no_space(amount: usize) -> Self { + fixed_size_vector_no_space(amount) + } + + #[inline(always)] + fn alloc_or_else( + chunk: RawChunk, + minimum_alignment: impl SupportedMinimumAlignment, + layout: impl LayoutProps, + f: impl FnOnce() -> Result, Self>, + ) -> Result, Self> { + chunk.alloc_or_else(minimum_alignment, layout, f) + } +} + +impl ErrorBehavior for AllocError { + #[inline(always)] + fn allocation(_: Layout) -> Self { + Self + } + + #[inline(always)] + fn capacity_overflow() -> Self { + Self + } + + #[inline(always)] + fn fixed_size_vector_is_full() -> Self { + Self + } + + #[inline(always)] + fn fixed_size_vector_no_space(amount: usize) -> Self { + let _ = amount; + Self + } + + #[inline(always)] + fn alloc_or_else( + chunk: RawChunk, + minimum_alignment: impl SupportedMinimumAlignment, + layout: impl LayoutProps, + f: impl FnOnce() -> Result, Self>, + ) -> Result, Self> { + match chunk.alloc(minimum_alignment, layout) { + Some(ptr) => Ok(ptr), + None => f(), + } + } +} + +#[cold] +#[inline(never)] +fn fixed_size_vector_is_full() -> ! { + panic!("fixed size vector is full"); +} + +#[cold] +#[inline(never)] +fn fixed_size_vector_no_space(amount: usize) -> ! { + panic!("fixed size vector does not have space for {amount} more elements"); +} diff --git a/src/allocation_behavior.rs b/src/layout.rs similarity index 100% rename from src/allocation_behavior.rs rename to src/layout.rs diff --git a/src/lib.rs b/src/lib.rs index 9cab73a..56312ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -304,9 +304,10 @@ mod stats; mod with_drop; mod without_dealloc; -mod allocation_behavior; #[cfg(feature = "std")] mod bump_pool; +mod error_behavior; +mod layout; use allocator_api2::alloc::{AllocError, Allocator}; @@ -338,11 +339,12 @@ pub use stats::{Chunk, ChunkNextIter, ChunkPrevIter, GuaranteedAllocatedStats, S pub use with_drop::WithDrop; pub use without_dealloc::{WithoutDealloc, WithoutShrink}; -use allocation_behavior::{ArrayLayout, CustomLayout}; use chunk_header::{unallocated_chunk_header, ChunkHeader}; use chunk_raw::RawChunk; use chunk_size::ChunkSize; use core::alloc::Layout; +use error_behavior::ErrorBehavior; +use layout::{ArrayLayout, CustomLayout}; use polyfill::{nonnull, pointer}; use set_len_on_drop::SetLenOnDrop; use set_len_on_drop_by_ptr::SetLenOnDropByPtr; @@ -364,15 +366,17 @@ impl NoDrop for T {} impl NoDrop for [T] {} /// Specifies the current minimum alignment of a bump allocator. +#[derive(Clone, Copy)] pub struct MinimumAlignment; mod supported_minimum_alignment { use crate::ArrayLayout; - #[allow(private_interfaces)] pub trait Sealed { /// We'd be fine with just an [`core::ptr::Alignment`], but that's not stable. const LAYOUT: ArrayLayout; + + const MIN_ALIGN: usize; } } @@ -381,17 +385,18 @@ mod supported_minimum_alignment { /// This trait is *sealed*: the list of implementors below is total. Users do not have the ability to mark additional /// `MinimumAlignment` values as supported. Only bump allocators with the supported minimum alignments are constructable. #[allow(private_bounds)] -pub trait SupportedMinimumAlignment: supported_minimum_alignment::Sealed {} +pub trait SupportedMinimumAlignment: supported_minimum_alignment::Sealed + Copy {} macro_rules! supported_alignments { ($($i:literal)*) => { $( impl supported_minimum_alignment::Sealed for MinimumAlignment<$i> { - #[allow(private_interfaces)] const LAYOUT: ArrayLayout = match ArrayLayout::from_size_align(0, $i) { Ok(layout) => layout, Err(_) => unreachable!(), }; + + const MIN_ALIGN: usize = $i; } impl SupportedMinimumAlignment for MinimumAlignment<$i> {} )* @@ -580,47 +585,6 @@ unsafe impl<'a, A: Allocator> Allocator for WithLifetime<'a, A> { } } -trait ErrorBehavior: Sized { - fn allocation(layout: Layout) -> Self; - fn capacity_overflow() -> Self; - fn fixed_size_vector_is_full() -> Self; - fn fixed_size_vector_no_space(amount: usize) -> Self; -} - -impl ErrorBehavior for Infallible { - #[inline(always)] - fn allocation(layout: Layout) -> Self { - handle_alloc_error(layout) - } - - #[inline(always)] - fn capacity_overflow() -> Self { - capacity_overflow() - } - - #[inline(always)] - fn fixed_size_vector_is_full() -> Self { - fixed_size_vector_is_full() - } - - #[inline(always)] - fn fixed_size_vector_no_space(amount: usize) -> Self { - fixed_size_vector_no_space(amount) - } -} - -#[cold] -#[inline(never)] -fn fixed_size_vector_is_full() -> ! { - panic!("fixed size vector is full"); -} - -#[cold] -#[inline(never)] -fn fixed_size_vector_no_space(amount: usize) -> ! { - panic!("fixed size vector does not have space for {amount} more elements"); -} - #[cold] #[inline(never)] #[cfg(not(feature = "alloc"))] @@ -628,29 +592,6 @@ fn handle_alloc_error(_layout: Layout) -> ! { panic!("allocation failed") } -impl ErrorBehavior for AllocError { - #[inline(always)] - fn allocation(_: Layout) -> Self { - Self - } - - #[inline(always)] - fn capacity_overflow() -> Self { - Self - } - - #[inline(always)] - fn fixed_size_vector_is_full() -> Self { - Self - } - - #[inline(always)] - fn fixed_size_vector_no_space(amount: usize) -> Self { - let _ = amount; - Self - } -} - // this is just `Result::into_ok` but with a name to match our use case #[inline(always)] #[cfg(not(no_global_oom_handling))] @@ -1367,8 +1308,8 @@ define_alloc_methods! { let dst = self.do_alloc_slice_for(slice)?; unsafe { - core::ptr::copy_nonoverlapping(src, dst.cast::().as_ptr(), len); - Ok(BumpBox::from_raw(dst)) + core::ptr::copy_nonoverlapping(src, dst.as_ptr(), len); + Ok(BumpBox::from_raw(nonnull::slice_from_raw_parts(dst, len))) } } @@ -1785,7 +1726,7 @@ define_alloc_methods! { for pub fn alloc_layout for pub fn try_alloc_layout fn generic_alloc_layout(&self, layout: Layout) -> NonNull | NonNull { - match self.chunk.get().alloc::(CustomLayout(layout)) { + match self.chunk.get().alloc(MinimumAlignment::, CustomLayout(layout)) { Some(ptr) => Ok(ptr), None => self.alloc_in_another_chunk(layout), } diff --git a/src/with_drop.rs b/src/with_drop.rs index 2842aa3..04392bc 100644 --- a/src/with_drop.rs +++ b/src/with_drop.rs @@ -44,7 +44,7 @@ use std::ops::Deref; use allocator_api2::alloc::Allocator; use crate::{ - allocation_behavior::LayoutProps, + layout::LayoutProps, polyfill::{layout, nonnull}, AllocError, AnyBump, BumpAllocator, BumpBox, ErrorBehavior, SizedTypeProperties, };