Skip to content

Conversation

@cuiweixie
Copy link
Contributor

No description provided.

Copy link
Member

@TimWolla TimWolla left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To me this decreases readability in something that is not in the hot path at all. I also expect the compiler to be able to do this optimization by itself.

@cuiweixie
Copy link
Contributor Author

cuiweixie commented Jan 30, 2026

To me this decreases readability in something that is not in the hot path at all. I also expect the compiler to be able to do this optimization by itself.

status_len, ping_len is easy to connect with strlen(status), strlen(ping). Usually compiler will not do the opt.

@cuiweixie cuiweixie requested a review from TimWolla January 30, 2026 12:50
@TimWolla TimWolla removed their request for review January 30, 2026 13:15
@iluuu1994
Copy link
Member

Usually compiler will not do the opt.

This is called loop-invariant code motion and is indeed a very common optimization. Such changes can be reasonable but only when demonstrating the generated assembly actually improves on a reasonably modern compiler.

@cuiweixie
Copy link
Contributor Author

cuiweixie commented Jan 30, 2026

@iluuu1994
test_strlen_opt.c

#include <string.h>
#include <ctype.h>

void test_func(char *status) {
    size_t i;
    for (i = 0; i < strlen(status); i++) {
        if (!isalnum(status[i])) {
            return;
        }
    }
}

clang -O3 -S -o test_strlen_opt.s test_strlen_opt.c
	.build_version macos, 15, 0	sdk_version 15, 2
	.section	__TEXT,__text,regular,pure_instructions
	.globl	_test_func                      ; -- Begin function test_func
	.p2align	2
_test_func:                             ; @test_func
	.cfi_startproc
; %bb.0:
	ldrb	w8, [x0]
	cbz	w8, LBB0_7
; %bb.1:
	stp	x22, x21, [sp, #-48]!           ; 16-byte Folded Spill
	stp	x20, x19, [sp, #16]             ; 16-byte Folded Spill
	stp	x29, x30, [sp, #32]             ; 16-byte Folded Spill
	add	x29, sp, #32
	.cfi_def_cfa w29, 16
	.cfi_offset w30, -8
	.cfi_offset w29, -16
	.cfi_offset w19, -24
	.cfi_offset w20, -32
	.cfi_offset w21, -40
	.cfi_offset w22, -48
	mov	x19, x0
	mov	x20, #0                         ; =0x0
Lloh0:
	adrp	x21, __DefaultRuneLocale@GOTPAGE
Lloh1:
	ldr	x21, [x21, __DefaultRuneLocale@GOTPAGEOFF]
	mov	w22, #1280                      ; =0x500
LBB0_2:                                 ; =>This Inner Loop Header: Depth=1
	ldrsb	w0, [x19, x20]
	tbnz	w0, #31, LBB0_4
; %bb.3:                                ;   in Loop: Header=BB0_2 Depth=1
	add	x8, x21, w0, uxtw #2
	ldr	w8, [x8, #60]
	and	w0, w8, w22
	cbnz	w0, LBB0_5
	b	LBB0_6
LBB0_4:                                 ;   in Loop: Header=BB0_2 Depth=1
	mov	w1, #1280                       ; =0x500
	bl	___maskrune
	cbz	w0, LBB0_6
LBB0_5:                                 ;   in Loop: Header=BB0_2 Depth=1
	add	x20, x20, #1
	mov	x0, x19
	bl	_strlen
	cmp	x20, x0
	b.lo	LBB0_2
LBB0_6:
	ldp	x29, x30, [sp, #32]             ; 16-byte Folded Reload
	ldp	x20, x19, [sp, #16]             ; 16-byte Folded Reload
	ldp	x22, x21, [sp], #48             ; 16-byte Folded Reload
LBB0_7:
	ret
	.loh AdrpLdrGot	Lloh0, Lloh1
	.cfi_endproc
                                        ; -- End function
.subsections_via_symbols

strlen is called inside loop. I think.

@cuiweixie
Copy link
Contributor Author

this example with main and llvm-objdump:

#include <string.h>
#include <ctype.h>
#include <stdio.h>

int test_func(char *status) {
    size_t i;
    for (i = 0; i < strlen(status); i++) {
        if (!isalnum(status[i])) {
            return 0;
        }
    }
    return 1;
}

int main(int argc, char **argv) {
	int r = test_func(argv[0]);
	printf("%d\n", r);
}

clang test_strlen_opt.c
llvm-objdump -D ./a.out > dis.s

./a.out:	file format mach-o arm64

Disassembly of section __TEXT,__text:

0000000100003eac <_test_func>:
100003eac: d100c3ff    	sub	sp, sp, #0x30
100003eb0: a9027bfd    	stp	x29, x30, [sp, #0x20]
100003eb4: 910083fd    	add	x29, sp, #0x20
100003eb8: f9000be0    	str	x0, [sp, #0x10]
100003ebc: f90007ff    	str	xzr, [sp, #0x8]
100003ec0: 14000001    	b	0x100003ec4 <_test_func+0x18>
100003ec4: f94007e8    	ldr	x8, [sp, #0x8]
100003ec8: f90003e8    	str	x8, [sp]
100003ecc: f9400be0    	ldr	x0, [sp, #0x10]
100003ed0: 94000032    	bl	0x100003f98 <_strlen+0x100003f98>
100003ed4: f94003e8    	ldr	x8, [sp]
100003ed8: eb000108    	subs	x8, x8, x0
100003edc: 540001e2    	b.hs	0x100003f18 <_test_func+0x6c>
100003ee0: 14000001    	b	0x100003ee4 <_test_func+0x38>
100003ee4: f9400be8    	ldr	x8, [sp, #0x10]
100003ee8: f94007e9    	ldr	x9, [sp, #0x8]
100003eec: 38e96900    	ldrsb	w0, [x8, x9]
100003ef0: 94000024    	bl	0x100003f80 <_strlen+0x100003f80>
100003ef4: 35000080    	cbnz	w0, 0x100003f04 <_test_func+0x58>
100003ef8: 14000001    	b	0x100003efc <_test_func+0x50>
100003efc: b81fc3bf    	stur	wzr, [x29, #-0x4]
100003f00: 14000009    	b	0x100003f24 <_test_func+0x78>
100003f04: 14000001    	b	0x100003f08 <_test_func+0x5c>
100003f08: f94007e8    	ldr	x8, [sp, #0x8]
100003f0c: 91000508    	add	x8, x8, #0x1
100003f10: f90007e8    	str	x8, [sp, #0x8]
100003f14: 17ffffec    	b	0x100003ec4 <_test_func+0x18>
100003f18: 52800028    	mov	w8, #0x1                ; =1
100003f1c: b81fc3a8    	stur	w8, [x29, #-0x4]
100003f20: 14000001    	b	0x100003f24 <_test_func+0x78>
100003f24: b85fc3a0    	ldur	w0, [x29, #-0x4]
100003f28: a9427bfd    	ldp	x29, x30, [sp, #0x20]
100003f2c: 9100c3ff    	add	sp, sp, #0x30
100003f30: d65f03c0    	ret

0000000100003f34 <_main>:
100003f34: d100c3ff    	sub	sp, sp, #0x30
100003f38: a9027bfd    	stp	x29, x30, [sp, #0x20]
100003f3c: 910083fd    	add	x29, sp, #0x20
100003f40: b81fc3a0    	stur	w0, [x29, #-0x4]
100003f44: f9000be1    	str	x1, [sp, #0x10]
100003f48: f9400be8    	ldr	x8, [sp, #0x10]
100003f4c: f9400100    	ldr	x0, [x8]
100003f50: 97ffffd7    	bl	0x100003eac <_test_func>
100003f54: b9000fe0    	str	w0, [sp, #0xc]
100003f58: b9400fe8    	ldr	w8, [sp, #0xc]
100003f5c: 910003e9    	mov	x9, sp
100003f60: f9000128    	str	x8, [x9]
100003f64: 90000000    	adrp	x0, 0x100003000 <_strlen+0x100003000>
100003f68: 913e9000    	add	x0, x0, #0xfa4
100003f6c: 94000008    	bl	0x100003f8c <_strlen+0x100003f8c>
100003f70: 52800000    	mov	w0, #0x0                ; =0
100003f74: a9427bfd    	ldp	x29, x30, [sp, #0x20]
100003f78: 9100c3ff    	add	sp, sp, #0x30
100003f7c: d65f03c0    	ret

Disassembly of section __TEXT,__stubs:

0000000100003f80 <__stubs>:
100003f80: b0000010    	adrp	x16, 0x100004000 <_strlen+0x100004000>
100003f84: f9400210    	ldr	x16, [x16]
100003f88: d61f0200    	br	x16
100003f8c: b0000010    	adrp	x16, 0x100004000 <_strlen+0x100004000>
100003f90: f9400610    	ldr	x16, [x16, #0x8]
100003f94: d61f0200    	br	x16
100003f98: b0000010    	adrp	x16, 0x100004000 <_strlen+0x100004000>
100003f9c: f9400a10    	ldr	x16, [x16, #0x10]
100003fa0: d61f0200    	br	x16

Disassembly of section __TEXT,__cstring:

0000000100003fa4 <__cstring>:
100003fa4: 000a6425    	<unknown>

Disassembly of section __TEXT,__unwind_info:

0000000100003fa8 <__unwind_info>:
100003fa8: 00000001    	udf	#0x1
100003fac: 0000001c    	udf	#0x1c
100003fb0: 00000000    	udf	#0x0
100003fb4: 0000001c    	udf	#0x1c
100003fb8: 00000000    	udf	#0x0
100003fbc: 0000001c    	udf	#0x1c
100003fc0: 00000002    	udf	#0x2
100003fc4: 00003eac    	udf	#0x3eac
100003fc8: 00000040    	udf	#0x40
100003fcc: 00000040    	udf	#0x40
100003fd0: 00003f80    	udf	#0x3f80
100003fd4: 00000000    	udf	#0x0
100003fd8: 00000040    	udf	#0x40
		...
100003fe8: 00000003    	udf	#0x3
100003fec: 0001000c    	<unknown>
100003ff0: 00010010    	<unknown>
100003ff4: 00000000    	udf	#0x0
100003ff8: 04000000    	add	z0.b, p0/m, z0.b, z0.b
100003ffc: 00000000    	udf	#0x0

Disassembly of section __DATA_CONST,__got:

0000000100004000 <__got>:
100004000: 00000000    	udf	#0x0
100004004: 80100000    	fmop4a	za0.s, z0.s, { z16.s, z17.s }
100004008: 00000001    	udf	#0x1
10000400c: 80100000    	fmop4a	za0.s, z0.s, { z16.s, z17.s }
100004010: 00000002    	udf	#0x2
100004014: 80000000    	fmop4a	za0.s, z0.s, z16.s

                    ┌─────────────────────┐
                    │ 100003eac: Entry    │
                    │ Setup stack frame   │
                    │ i = 0 (sp+0x8)      │
                    └──────────┬──────────┘
                               │
                               ▼
                    ┌─────────────────────┐
                    │ 100003ec4: Loop Start│◄──┐
                    │ ldr x8, [sp, #0x8]  │   │
                    │ (Load loop var i)   │   │
                    └──────────┬──────────┘   │
                               │              │
                               ▼              │
                    ┌─────────────────────┐   │
                    │ 100003ed0:          │   │
                    │ ╔═════════════════╗ │   │
                    │ ║ bl _strlen      ║ │   │ ⭐ strlen in loop!
                    │ ║ (Called each iter)║ │   │
                    │ ╚═════════════════╝ │   │
                    └──────────┬──────────┘   │
                               │              │
                               ▼              │
                    ┌─────────────────────┐   │
                    │ 100003ed8:          │   │
                    │ subs x8, x8, x0     │   │
                    │ (Compare i - strlen)│   │
                    │ b.hs 0x100003f18    │   │
                    └──────────┬──────────┘   │
                               │              │
                ┌──────────────┴──────────────┐
                │                              │
        (i < strlen)                   (i >= strlen)
                │                              │
                ▼                              ▼
        ┌───────────────┐          ┌──────────────────┐
        │ 100003ee4:    │          │ 100003f18:       │
        │ Loop Body     │          │ Loop End         │
        │ Load char     │          │ mov w8, #1       │
        │ ldrsb w0, ... │          └────────┬─────────┘
        └───────┬───────┘                  │
                │                           │
                ▼                           │
        ┌───────────────┐                  │
        │ 100003ef0:    │                  │
        │ bl isalnum    │                  │
        │ (Check char)  │                  │
        └───────┬───────┘                  │
                │                           │
        ┌───────┴────────┐                 │
        │                │                 │
    (is alnum)      (not alnum)            │
        │                │                 │
        │                └────────┬─────────┘
        │                         │
        ▼                         ▼
┌───────────────────────────────────────┐
│ 100003f08:                            │
│ add x8, x8, #1                        │ ← i++
│ str x8, [sp, #0x8]                    │
│ b 0x100003ec4                         │ ← Jump back to loop start
└───────────────────────────────────────┘

@TimWolla
Copy link
Member

For the first snippet, the assembly doesn't make sense, that can simply be optimized into a simple return.

For the second snippet you're compiling without optimizations. With -O2 the strlen() call only happens once: https://godbolt.org/z/Mv977P9YK

In any case: This is not a hot code path and readability is preferable.

@cuiweixie cuiweixie closed this Jan 30, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants