Skip to content

Commit 4cd575c

Browse files
authored
Merge pull request #5423 from martin-frbg/issue5414
Split VORTEXM4 from VORTEX target and fix SGEMM_DIRECT support for SME-capable targets
2 parents e07bea1 + 6f225da commit 4cd575c

32 files changed

+445
-149
lines changed

Makefile.arm64

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ endif
6161
ifeq ($(CORE), ARMV9SME)
6262
CCOMMON_OPT += -march=armv9-a+sve2+sme
6363
FCOMMON_OPT += -march=armv9-a+sve2
64+
ifdef OS_WINDOWS
65+
ifeq ($(C_COMPILER), CLANG)
66+
CCOMMON_OPT += --aarch64-stack-hazard-size=0
67+
endif
68+
endif
6469
endif
6570

6671
ifeq ($(CORE), CORTEXA53)
@@ -303,6 +308,20 @@ FCOMMON_OPT += -march=armv8.3-a
303308
endif
304309
endif
305310

311+
ifeq ($(CORE), VORTEXM4)
312+
ifneq ($(C_COMPILER), GCC)
313+
ifeq ($(APPLECLANG),1)
314+
CCOMMON_OPT += -march=armv8.4-a+sme
315+
else
316+
CCOMMON_OPT += -march=armv8.4-a+sme
317+
override LDFLAGS += -lclang_rt_builtins-aarch64
318+
endif
319+
else
320+
CCOMMON_OPT += -march=armv8.4-a
321+
endif
322+
FCOMMON_OPT += -march=armv8.4-a
323+
endif
324+
306325
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
307326
ifeq ($(CORE), TSV110)
308327
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110

Makefile.system

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ HAVE_SSE5=
331331
HAVE_AVX=
332332
HAVE_AVX2=
333333
HAVE_FMA3=
334+
HAVE_SME=
334335
include $(TOPDIR)/Makefile_kernel.conf
335336
endif
336337

@@ -427,7 +428,7 @@ ifndef MACOSX_DEPLOYMENT_TARGET
427428
ifeq ($(ARCH), arm64)
428429
export MACOSX_DEPLOYMENT_TARGET=11.0
429430
export NO_SVE = 1
430-
export NO_SME = 1
431+
# export NO_SME = 1
431432
else
432433
export MACOSX_DEPLOYMENT_TARGET=10.8
433434
endif
@@ -721,6 +722,11 @@ DYNAMIC_CORE += A64FX
721722
endif
722723
ifneq ($(NO_SME), 1)
723724
DYNAMIC_CORE += ARMV9SME
725+
ifeq ($(OSNAME), Darwin)
726+
ifneq ($(C_COMPILER), GCC)
727+
DYNAMIC_CORE += VORTEXM4
728+
endif
729+
endif
724730
endif
725731
DYNAMIC_CORE += THUNDERX
726732
DYNAMIC_CORE += THUNDERX2T99
@@ -1896,6 +1902,7 @@ ifndef NO_MSA
18961902
export HAVE_MSA
18971903
export MSA_FLAGS
18981904
endif
1905+
export HAVE_SME
18991906
export KERNELDIR
19001907
export FUNCTION_PROFILE
19011908
export TARGET_CORE

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ THUNDERX2T99
111111
TSV110
112112
THUNDERX3T110
113113
VORTEX
114+
VORTEXM4
114115
A64FX
115116
ARMV8SVE
116117
ARMV9SME

c_check

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,14 @@ if [ "$architecture" = "arm64" ]; then
335335
fi
336336

337337
no_sme=0
338+
is_appleclang=0
338339
if [ "$architecture" = "arm64" ]; then
340+
if [ "$compiler" = "CLANG" ]; then
341+
data=`$compiler_name --version`
342+
case "$data" in Apple*)
343+
is_appleclang=1
344+
esac
345+
fi
339346
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
340347
tmpf="$tmpd/a.S"
341348
printf ".text \n.global sme_test\n\nsme_test:\nsmstart\nsmstop\nret\n">> "$tmpf"
@@ -469,6 +476,7 @@ done
469476
[ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
470477
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
471478
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
479+
[ "$is_appleclang" -eq 1 ] && printf "APPLECLANG=1\n"
472480
exit 0
473481
}
474482

@@ -499,6 +507,7 @@ done
499507
[ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
500508
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
501509
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
510+
[ "$is_appleclang" -eq 1 ] && printf "APPLECLANG=1\n"
502511
[ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n"
503512
[ "$no_lasx" -eq 1 ] && printf "NO_LASX=1\n"
504513
} >> "$makefile"

cmake/arch.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,16 @@ if (DYNAMIC_ARCH)
4040
endif ()
4141
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 14) # SME ACLE supported in GCC >= 14
4242
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
43+
endif()
44+
if (${CMAKE_C_COMPILER_ID} MATCHES "Clang" AND ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
45+
set(DYNAMIC_CORE ${DYNAMIC_CORE} VORTEXM4)
4346
endif()
4447
elseif (${CMAKE_C_COMPILER_ID} MATCHES "Clang")
4548
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 11) # SVE ACLE supported in LLVM >= 11
4649
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE A64FX)
4750
endif ()
48-
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19) # SME ACLE supported in LLVM >= 19
49-
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME)
51+
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 19 OR (${CMAKE_C_COMPILER_ID} MATCHES AppleClang AND ${CMAKE_C_COMPILER_VERSION} VERSION_GREATER_EQUAL 17) ) # SME ACLE supported in LLVM >= 19 and AppleClang >= 17
52+
set(DYNAMIC_CORE ${DYNAMIC_CORE} ARMV9SME VORTEXM4)
5053
endif()
5154
endif ()
5255
if (DYNAMIC_LIST)

cmake/cc.cmake

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,24 @@ if (${CORE} STREQUAL ARMV9SME)
315315
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=host")
316316
else ()
317317
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv9-a+sme")
318+
if (${OSNAME} STREQUAL Windows AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang" )
319+
set (CCOMMON_OPT "${CCOMMON_OPT} --aarch64-stack-hazard-size=0")
318320
endif ()
321+
endif ()
322+
endif ()
323+
endif ()
324+
325+
if (${CORE} STREQUAL VORTEXM4)
326+
if (NOT DYNAMIC_ARCH)
327+
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
328+
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=host")
329+
else ()
330+
if (${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
331+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sme -mcpu=apple-m4")
332+
else ()
333+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a -mcpu=apple-m4")
334+
endif ()
335+
endif ()
319336
endif ()
320337
endif ()
321338

cmake/prebuild.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ endif ()
12551255
set(ZGEMM_UNROLL_M 4)
12561256
set(ZGEMM_UNROLL_N 4)
12571257
set(SYMV_P 16)
1258-
elseif ("${TCORE}" STREQUAL "VORTEX")
1258+
elseif ("${TCORE}" STREQUAL "VORTEX" OR "${TCORE}" STREQUAL "VORTEXM4")
12591259
file(APPEND ${TARGET_CONF_TEMP}
12601260
"#define ARMV8\n"
12611261
"#define L1_CODE_SIZE\t32768\n"
@@ -1639,6 +1639,8 @@ else(NOT CMAKE_CROSSCOMPILING)
16391639
unset (HAVE_VFP)
16401640
unset (HAVE_VFPV3)
16411641
unset (HAVE_VFPV4)
1642+
unset (HAVE_SVE)
1643+
unset (HAVE_SME)
16421644
message(STATUS "Running getarch")
16431645

16441646
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way

cmake/system.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,15 @@ if (${TARGET} STREQUAL NEOVERSEV1)
367367
endif()
368368
if (${TARGET} STREQUAL ARMV9SME)
369369
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv9-a+sme -O3")
370+
if (${CMAKE_SYSTEM_NAME} STREQUAL Windows AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
371+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} --aarch64-stack-hazard-size=0")
372+
endif()
373+
endif()
374+
if (${TARGET} STREQUAL VORTEXM4)
375+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sme -O3")
376+
if (${CMAKE_SYSTEM_NAME} STREQUAL Windows AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
377+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} --aarch64-stack-hazard-size=0")
378+
endif()
370379
endif()
371380
if (${TARGET} STREQUAL A64FX)
372381
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)

common_param.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ int (*shgemv_t) (BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BL
257257
#endif
258258
#ifdef ARCH_ARM64
259259
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
260+
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
260261
void (*sgemm_direct_alpha_beta) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);
261262
void (*ssymm_direct_alpha_betaLU) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);
262263
void (*ssymm_direct_alpha_betaLL) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);

common_s.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@
231231
#define SGEMM_DIRECT_PERFORMANT gotoblas -> sgemm_direct_performant
232232
#define SGEMM_DIRECT gotoblas -> sgemm_direct
233233
#elif ARCH_ARM64
234-
#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant
234+
#define SGEMM_DIRECT_PERFORMANT gotoblas -> sgemm_direct_performant
235235
#define SGEMM_DIRECT gotoblas -> sgemm_direct
236236
#define SGEMM_DIRECT_ALPHA_BETA gotoblas -> sgemm_direct_alpha_beta
237237
#define SSYMM_DIRECT_ALPHA_BETA_LU gotoblas -> ssymm_direct_alpha_betaLU

0 commit comments

Comments
 (0)