From 32b877f690a3b7068a89c904880fb94cf954dcb2 Mon Sep 17 00:00:00 2001 From: Zhaoshi Zheng Date: Tue, 20 Jan 2026 17:16:58 -0800 Subject: [PATCH 1/3] Add flag -mmark-bti-property for memcpy.S and memset.S Signed-off-by: Zhaoshi Zheng --- Makefile | 4 ++++ configure | 6 ++++++ .../linux/arm/aarch64-nofp-pacret-b-key-bti_baremetal.sh | 2 +- .../config/linux/arm/aarch64-nofp-pacret-bti_baremetal.sh | 2 +- .../config/linux/arm/aarch64-pacret-b-key-bti_baremetal.sh | 4 ++-- .../config/linux/arm/aarch64-pacret-bti_baremetal.sh | 4 ++-- .../config/linux/arm/aarch64-pacret-bti_linux.sh | 2 +- 7 files changed, 17 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index ddba8b78..056b62c4 100644 --- a/Makefile +++ b/Makefile @@ -114,6 +114,10 @@ ifeq ($(QUIC_AARCH64_NOFP),yes) ALL_OBJS := $(subst obj/src/math/aarch64/,obj/src/math/, $(ALL_OBJS)) endif +ifeq ($(QUIC_AARCH64_MARK_BTI),yes) +ASM_SRCS = src/string/aarch64/memcpy.S src/string/aarch64/memset.S +$(ASM_SRCS:%.S=obj/%.o) $(ASM_SRCS:%.S=obj/%.lo): CFLAGS_ALL += -mmark-bti-property +endif include build_variants.mk # Qualcomm-specific code - end diff --git a/configure b/configure index 45a044cd..0caec6ce 100755 --- a/configure +++ b/configure @@ -41,6 +41,7 @@ Qualcomm-specific options: --quic-arm-armv6m-armv5 build for ARMV6M or ARMV5 architectures [disabled] --quic-arm-nofp use optimized no floating point memset [disabled] --quic-aarch64-nofp build for aarch64 with no floating point [disabled] + --quic-aarch64-mark-bti mark bti property for assembly source files [disabled] --quic-libc-standalone build standalone libc [disabled] --quic-baremetal-uselock build for baremetal with locking/unlocking [disabled] @@ -191,6 +192,7 @@ case "$arg" in --quic-arm-armv6m-armv5|--quic-arm-armv6m-armv5=yes) quic_arm_armv6m_armv5=yes ;; --quic-arm-nofp|--quic-arm-nofp=yes) quic_arm_nofp=yes ;; --quic-aarch64-nofp|--quic-aarch64-nofp=yes) quic_aarch64_nofp=yes ;; +--quic-aarch64-mark-bti|--quic-aarch64-mark-bti=yes) quic_aarch64_mark_bti=yes ;; --quic-libc-standalone|--quic-libc-standalone=yes) quic_libc_standalone=yes ;; --quic-baremetal-uselock|--quic-baremetal-uselock=yes) quic_baremetal_uselock=yes ;; --enable-*|--disable-*|--with-*|--without-*|--*dir=*) ;; @@ -707,6 +709,9 @@ fi if test "x$quic_aarch64_nofp" = xyes ; then QUIC_AARCH64_NOFP=yes fi +if test "x$quic_aarch64_mark_bti" = xyes ; then +QUIC_AARCH64_MARK_BTI=yes +fi if test "x$quic_baremetal_uselock" = xyes ; then CFLAGS_AUTO="$CFLAGS_AUTO -D__QUIC_BAREMETAL_USELOCK" CFLAGS_AUTO="${CFLAGS_AUTO# }" @@ -849,6 +854,7 @@ QUIC_ARM_BAREMETAL = $QUIC_ARM_BAREMETAL QUIC_ARM_ARMV6M_ARMV5 = $QUIC_ARM_ARMV6M_ARMV5 QUIC_ARM_NOFP = $QUIC_ARM_NOFP QUIC_AARCH64_NOFP = $QUIC_AARCH64_NOFP +QUIC_AARCH64_MARK_BTI = $QUIC_AARCH64_MARK_BTI QUIC_BAREMETAL_USELOCK = $QUIC_BAREMETAL_USELOCK QUIC_LIBC_STANDALONE = $QUIC_LIBC_STANDALONE QUIC_VARIANTS = $QUIC_VARIANTS diff --git a/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-b-key-bti_baremetal.sh b/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-b-key-bti_baremetal.sh index 90d0b438..5ef65936 100644 --- a/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-b-key-bti_baremetal.sh +++ b/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-b-key-bti_baremetal.sh @@ -8,4 +8,4 @@ RESOURCE_DIR=$(clang -print-resource-dir) PATH_TO_BUILTIN=$RESOURCE_DIR/lib/baremetal BUILTIN=clang_rt.builtins-pacret-bti-aarch64-nofp -./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-nofp --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-mabi=aapcs-soft -march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -mgeneral-regs-only -mllvm -aarch64-enable-simd-scalar=false -fno-rounding-math" LDFLAGS="-Wl,-L${PATH_TO_BUILTIN}" LIBCC="-l${BUILTIN}" "$@" +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-nofp --quic-aarch64-mark-bti --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-mabi=aapcs-soft -march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -mgeneral-regs-only -mllvm -aarch64-enable-simd-scalar=false -fno-rounding-math" LDFLAGS="-Wl,-L${PATH_TO_BUILTIN}" LIBCC="-l${BUILTIN}" "$@" diff --git a/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-bti_baremetal.sh b/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-bti_baremetal.sh index 984c8484..919f6143 100644 --- a/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-bti_baremetal.sh +++ b/qualcomm-software/config/linux/arm/aarch64-nofp-pacret-bti_baremetal.sh @@ -8,4 +8,4 @@ RESOURCE_DIR=$(clang -print-resource-dir) PATH_TO_BUILTIN=$RESOURCE_DIR/lib/baremetal BUILTIN=clang_rt.builtins-pacret-bti-aarch64-nofp -./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-nofp --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-mabi=aapcs-soft -march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -mgeneral-regs-only -mllvm -aarch64-enable-simd-scalar=false -fno-rounding-math" LDFLAGS="-Wl,-L${PATH_TO_BUILTIN}" LIBCC="-l${BUILTIN}" "$@" +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-nofp --quic-aarch64-mark-bti --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-mabi=aapcs-soft -march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -mgeneral-regs-only -mllvm -aarch64-enable-simd-scalar=false -fno-rounding-math" LDFLAGS="-Wl,-L${PATH_TO_BUILTIN}" LIBCC="-l${BUILTIN}" "$@" diff --git a/qualcomm-software/config/linux/arm/aarch64-pacret-b-key-bti_baremetal.sh b/qualcomm-software/config/linux/arm/aarch64-pacret-b-key-bti_baremetal.sh index d07bbaaa..ba389244 100644 --- a/qualcomm-software/config/linux/arm/aarch64-pacret-b-key-bti_baremetal.sh +++ b/qualcomm-software/config/linux/arm/aarch64-pacret-b-key-bti_baremetal.sh @@ -13,11 +13,11 @@ echo $INSTALL_DIR mkdir -p $INSTALL_DIR/lib # configure for libc standalone -./configure --disable-wrapper --quic-arm-baremetal --quic-libc-standalone --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -D__QUIC_ENABLE_FLT_FOR_PRINT -fno-rounding-math" "$@" --prefix=tmp_install_dir/libc +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-mark-bti --quic-libc-standalone --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -D__QUIC_ENABLE_FLT_FOR_PRINT -fno-rounding-math" "$@" --prefix=tmp_install_dir/libc make -j8 && make install cp tmp_install_dir/libc/lib/libc.a $INSTALL_DIR/lib/libc-standalone.a rm -rf tmp_install_dir make distclean -./configure --disable-wrapper --quic-arm-baremetal --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -fno-rounding-math" "$@" +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-mark-bti --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.5-a -mbranch-protection=pac-ret+leaf+b-key+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -fno-rounding-math" "$@" diff --git a/qualcomm-software/config/linux/arm/aarch64-pacret-bti_baremetal.sh b/qualcomm-software/config/linux/arm/aarch64-pacret-bti_baremetal.sh index 01acfad6..960c6cc1 100644 --- a/qualcomm-software/config/linux/arm/aarch64-pacret-bti_baremetal.sh +++ b/qualcomm-software/config/linux/arm/aarch64-pacret-bti_baremetal.sh @@ -13,11 +13,11 @@ echo $INSTALL_DIR mkdir -p $INSTALL_DIR/lib # configure for libc standalone -./configure --disable-wrapper --quic-arm-baremetal --quic-libc-standalone --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -D__QUIC_ENABLE_FLT_FOR_PRINT -fno-rounding-math" "$@" --prefix=tmp_install_dir/libc +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-mark-bti --quic-libc-standalone --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -D__QUIC_ENABLE_FLT_FOR_PRINT -fno-rounding-math" "$@" --prefix=tmp_install_dir/libc make -j8 && make install cp tmp_install_dir/libc/lib/libc.a $INSTALL_DIR/lib/libc-standalone.a rm -rf tmp_install_dir make distclean -./configure --disable-wrapper --quic-arm-baremetal --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -fno-rounding-math" "$@" +./configure --disable-wrapper --quic-arm-baremetal --quic-aarch64-mark-bti --disable-visibility CROSS_COMPILE="llvm-" CC="clang --target=aarch64-none-elf -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -Os -mstrict-align -fPIC -fdata-sections -ffunction-sections -fvisibility=hidden -DVISIBILITY_HIDDEN -fno-rounding-math" "$@" diff --git a/qualcomm-software/config/linux/arm/aarch64-pacret-bti_linux.sh b/qualcomm-software/config/linux/arm/aarch64-pacret-bti_linux.sh index a791b94b..9afdece1 100644 --- a/qualcomm-software/config/linux/arm/aarch64-pacret-bti_linux.sh +++ b/qualcomm-software/config/linux/arm/aarch64-pacret-bti_linux.sh @@ -1,3 +1,3 @@ # should be run from MUSL source top directory # ./configure --disable-wrapper CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -O3 -mstrict-align -fno-vectorize-loops -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/generic/libclang_rt.builtins-pacret-bti-aarch64.a" "$@" -./configure --disable-wrapper CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -O3 -mstrict-align -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/linux/libclang_rt.builtins-pacret-bti-aarch64.a" "$@" +./configure --disable-wrapper --quic-aarch64-mark-bti CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-march=armv8.3a -mbranch-protection=pac-ret+leaf+bti -O3 -mstrict-align -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/linux/libclang_rt.builtins-pacret-bti-aarch64.a" "$@" From f41532bac37c89d0464f4d7a5624c33a26c16fbb Mon Sep 17 00:00:00 2001 From: Pengxuan Zheng Date: Tue, 20 Jan 2026 18:17:50 -0800 Subject: [PATCH 2/3] [aarch64] Add bti landing pad for memset/memcpy This patch adds bti landing pad for aarch64's assembly implementation of memset/memcpy when --quic-aarch64-mark-bti is passed during configure. Signed-off-by: Pengxuan Zheng --- configure | 2 ++ src/string/aarch64/memcpy.S | 3 +++ src/string/aarch64/memset.S | 3 +++ 3 files changed, 8 insertions(+) diff --git a/configure b/configure index 0caec6ce..2d2fdd73 100755 --- a/configure +++ b/configure @@ -710,6 +710,8 @@ if test "x$quic_aarch64_nofp" = xyes ; then QUIC_AARCH64_NOFP=yes fi if test "x$quic_aarch64_mark_bti" = xyes ; then +CFLAGS_AUTO="$CFLAGS_AUTO -D__QUIC_BTI" +CFLAGS_AUTO="${CFLAGS_AUTO# }" QUIC_AARCH64_MARK_BTI=yes fi if test "x$quic_baremetal_uselock" = xyes ; then diff --git a/src/string/aarch64/memcpy.S b/src/string/aarch64/memcpy.S index 584fcf9c..3370aed0 100644 --- a/src/string/aarch64/memcpy.S +++ b/src/string/aarch64/memcpy.S @@ -58,6 +58,9 @@ .type memcpy,%function memcpy: .cfi_startproc +#ifdef __QUIC_BTI + bti c +#endif add srcend, src, count add dstend, dstin, count cmp count, 128 diff --git a/src/string/aarch64/memset.S b/src/string/aarch64/memset.S index 0aa0f614..13d3d02f 100644 --- a/src/string/aarch64/memset.S +++ b/src/string/aarch64/memset.S @@ -51,6 +51,9 @@ .type memset,%function memset: .cfi_startproc +#ifdef __QUIC_BTI + bti c +#endif mov dst, dstin /* Preserve return value. */ ands A_lw, val, #255 b.eq .Lzero_mem /* Use DC ZVA instruction if the val = 0 */ From 87e47b2d3798d704ac782d7ae67e48c648bd9b1a Mon Sep 17 00:00:00 2001 From: Pengxuan Zheng Date: Wed, 21 Jan 2026 09:46:30 -0800 Subject: [PATCH 3/3] [aarch64] Add configure flag to control the use of optimized memset/memcpy This patch adds a configure flag "--quic-aarch64-optmem" to optionally enable aarch64's assembly implementation of memset/memcpy. With this patch, the generic implementation is used by default and the optimized implementation is only used if the flag "--quic-aarch64-optmem" is passed explicitly when configuring. More specifically, with this patch, all aarch64 baremetal variants will use the generic version of memset/memcpy instead of the optimzed one. We only enable the optimized memset/memcpy for the aarch64 linux variant for now. Signed-off-by: Pengxuan Zheng --- Makefile | 5 +++++ configure | 6 ++++++ qualcomm-software/config/linux/arm/aarch64_linux.sh | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 056b62c4..0d1e878c 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,11 @@ ifeq ($(QUIC_AARCH64_MARK_BTI),yes) ASM_SRCS = src/string/aarch64/memcpy.S src/string/aarch64/memset.S $(ASM_SRCS:%.S=obj/%.o) $(ASM_SRCS:%.S=obj/%.lo): CFLAGS_ALL += -mmark-bti-property endif + +ifneq ($(QUIC_AARCH64_OPTMEM),yes) +# Use the generic implementation of memset/memcpy instead. +ALL_OBJS := $(subst obj/src/string/aarch64/,obj/src/string/, $(ALL_OBJS)) +endif include build_variants.mk # Qualcomm-specific code - end diff --git a/configure b/configure index 2d2fdd73..886b3de8 100755 --- a/configure +++ b/configure @@ -42,6 +42,7 @@ Qualcomm-specific options: --quic-arm-nofp use optimized no floating point memset [disabled] --quic-aarch64-nofp build for aarch64 with no floating point [disabled] --quic-aarch64-mark-bti mark bti property for assembly source files [disabled] + --quic-aarch64-optmem use optimized memset/memcpy for aarch64 [disabled] --quic-libc-standalone build standalone libc [disabled] --quic-baremetal-uselock build for baremetal with locking/unlocking [disabled] @@ -193,6 +194,7 @@ case "$arg" in --quic-arm-nofp|--quic-arm-nofp=yes) quic_arm_nofp=yes ;; --quic-aarch64-nofp|--quic-aarch64-nofp=yes) quic_aarch64_nofp=yes ;; --quic-aarch64-mark-bti|--quic-aarch64-mark-bti=yes) quic_aarch64_mark_bti=yes ;; +--quic-aarch64-optmem|--quic-aarch64-optmem=yes) quic_aarch64_optmem=yes ;; --quic-libc-standalone|--quic-libc-standalone=yes) quic_libc_standalone=yes ;; --quic-baremetal-uselock|--quic-baremetal-uselock=yes) quic_baremetal_uselock=yes ;; --enable-*|--disable-*|--with-*|--without-*|--*dir=*) ;; @@ -714,6 +716,9 @@ CFLAGS_AUTO="$CFLAGS_AUTO -D__QUIC_BTI" CFLAGS_AUTO="${CFLAGS_AUTO# }" QUIC_AARCH64_MARK_BTI=yes fi +if test "x$quic_aarch64_optmem" = xyes ; then +QUIC_AARCH64_OPTMEM=yes +fi if test "x$quic_baremetal_uselock" = xyes ; then CFLAGS_AUTO="$CFLAGS_AUTO -D__QUIC_BAREMETAL_USELOCK" CFLAGS_AUTO="${CFLAGS_AUTO# }" @@ -857,6 +862,7 @@ QUIC_ARM_ARMV6M_ARMV5 = $QUIC_ARM_ARMV6M_ARMV5 QUIC_ARM_NOFP = $QUIC_ARM_NOFP QUIC_AARCH64_NOFP = $QUIC_AARCH64_NOFP QUIC_AARCH64_MARK_BTI = $QUIC_AARCH64_MARK_BTI +QUIC_AARCH64_OPTMEM = $QUIC_AARCH64_OPTMEM QUIC_BAREMETAL_USELOCK = $QUIC_BAREMETAL_USELOCK QUIC_LIBC_STANDALONE = $QUIC_LIBC_STANDALONE QUIC_VARIANTS = $QUIC_VARIANTS diff --git a/qualcomm-software/config/linux/arm/aarch64_linux.sh b/qualcomm-software/config/linux/arm/aarch64_linux.sh index 5d188f8a..af3a0602 100644 --- a/qualcomm-software/config/linux/arm/aarch64_linux.sh +++ b/qualcomm-software/config/linux/arm/aarch64_linux.sh @@ -1,3 +1,3 @@ # should be run from MUSL source top directory # ./configure --disable-wrapper CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-O3 -mstrict-align -fno-vectorize-loops -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/generic/libclang_rt.builtins-aarch64.a" "$@" -./configure --disable-wrapper CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-O3 -mstrict-align -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/linux/libclang_rt.builtins-aarch64.a" "$@" +./configure --disable-wrapper --quic-aarch64-optmem CROSS_COMPILE="llvm-" CC="clang --target=aarch64-linux-gnu -fuse-ld=eld" CFLAGS="-O3 -mstrict-align -fPIC -fno-rounding-math" LIBCC="$(clang -print-resource-dir)/lib/linux/libclang_rt.builtins-aarch64.a" "$@"