From 5304ab54606e1277afefee86a39a79bd0bd471f7 Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Thu, 1 May 2025 16:19:08 -0700 Subject: [PATCH 1/3] Added sb instruction support for ARMv9 architecture --- Makefile.in | 1 + include/jemalloc/internal/spin.h | 5 +++- include/jemalloc/internal/spin_delay_arm.h | 21 +++++++++++++++++ src/spin_delay_arm.c | 27 ++++++++++++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 include/jemalloc/internal/spin_delay_arm.h create mode 100644 src/spin_delay_arm.c diff --git a/Makefile.in b/Makefile.in index ac8c51ff..bb0f5ad5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -152,6 +152,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \ $(srcroot)src/safety_check.c \ $(srcroot)src/sc.c \ $(srcroot)src/sec.c \ + $(srcroot)src/spin_delay_arm.c \ $(srcroot)src/stats.c \ $(srcroot)src/sz.c \ $(srcroot)src/tcache.c \ diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h index 87c400d5..127edfc3 100644 --- a/include/jemalloc/internal/spin.h +++ b/include/jemalloc/internal/spin.h @@ -2,6 +2,7 @@ #define JEMALLOC_INTERNAL_SPIN_H #include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/spin_delay_arm.h" #define SPIN_INITIALIZER {0U} @@ -11,7 +12,9 @@ typedef struct { static inline void spin_cpu_spinwait(void) { -# if HAVE_CPU_SPINWAIT +# if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) + spin_delay_arm(); +# elif HAVE_CPU_SPINWAIT CPU_SPINWAIT; # else volatile int x = 0; diff --git a/include/jemalloc/internal/spin_delay_arm.h b/include/jemalloc/internal/spin_delay_arm.h new file mode 100644 index 00000000..3ea86c6c --- /dev/null +++ b/include/jemalloc/internal/spin_delay_arm.h @@ -0,0 +1,21 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include + +/* Global variable to track SB support, declared as extern to be defined in one TU */ +extern _Atomic int arm_has_sb_instruction; + +/* Constructor function declaration - implementation in spin_delay_arm.c */ +__attribute__((constructor)) +void detect_arm_sb_support(void); + +/* Use SB instruction if available, otherwise ISB */ +static inline void +spin_delay_arm(void) { + if (__builtin_expect(arm_has_sb_instruction == 1, 1)) { + /* SB instruction encoding */ + asm volatile(".inst 0xd50330ff \n"); + } else { + /* ISB instruction */ + asm volatile("isb; \n"); + } +} diff --git a/src/spin_delay_arm.c b/src/spin_delay_arm.c new file mode 100644 index 00000000..975023b0 --- /dev/null +++ b/src/spin_delay_arm.c @@ -0,0 +1,27 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/spin_delay_arm.h" +#include + +#if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) +#include + +/* Define HWCAP_SB if not already defined in system headers */ +#ifndef HWCAP_SB +#define HWCAP_SB (1ULL << 56) /* Speculation Barrier */ +#endif // HWCAP_SB +#endif // __linux__ && (defined(__aarch64__) || defined(__arm64__)) + +/* Global variable to track SB support, defined here to avoid multiple definitions */ +_Atomic int arm_has_sb_instruction = ATOMIC_VAR_INIT(0); + +/* Constructor function to detect hardware capabilities at program startup */ +__attribute__((constructor)) +void +detect_arm_sb_support(void) { +#if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) + /* Check if SB instruction is supported */ + if (getauxval(AT_HWCAP) & HWCAP_SB) { + atomic_store_explicit(&arm_has_sb_instruction, 1, memory_order_release); + } +#endif +} \ No newline at end of file From d1bb7c6ba701da97ce2f5ce6a50350d421f18a9d Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Fri, 9 May 2025 17:05:38 -0700 Subject: [PATCH 2/3] relay on HWCAP_SB definition --- include/jemalloc/internal/spin_delay_arm.h | 2 ++ src/spin_delay_arm.c | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/jemalloc/internal/spin_delay_arm.h b/include/jemalloc/internal/spin_delay_arm.h index 3ea86c6c..8d5628ed 100644 --- a/include/jemalloc/internal/spin_delay_arm.h +++ b/include/jemalloc/internal/spin_delay_arm.h @@ -11,6 +11,7 @@ void detect_arm_sb_support(void); /* Use SB instruction if available, otherwise ISB */ static inline void spin_delay_arm(void) { +#ifdef HWCAP_SB if (__builtin_expect(arm_has_sb_instruction == 1, 1)) { /* SB instruction encoding */ asm volatile(".inst 0xd50330ff \n"); @@ -18,4 +19,5 @@ spin_delay_arm(void) { /* ISB instruction */ asm volatile("isb; \n"); } +#endif // HWCAP_SB } diff --git a/src/spin_delay_arm.c b/src/spin_delay_arm.c index 975023b0..2aeb37d1 100644 --- a/src/spin_delay_arm.c +++ b/src/spin_delay_arm.c @@ -4,11 +4,6 @@ #if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) #include - -/* Define HWCAP_SB if not already defined in system headers */ -#ifndef HWCAP_SB -#define HWCAP_SB (1ULL << 56) /* Speculation Barrier */ -#endif // HWCAP_SB #endif // __linux__ && (defined(__aarch64__) || defined(__arm64__)) /* Global variable to track SB support, defined here to avoid multiple definitions */ From 3ebca8b44fb99193204f54abeefa7f34aebfbb5a Mon Sep 17 00:00:00 2001 From: Salvatore Dipietro Date: Mon, 12 May 2025 14:26:14 -0700 Subject: [PATCH 3/3] Removed atomics and added guards for compilers --- include/jemalloc/internal/spin.h | 3 ++- include/jemalloc/internal/spin_delay_arm.h | 18 ++++------------- src/spin_delay_arm.c | 23 ++++++++-------------- 3 files changed, 14 insertions(+), 30 deletions(-) diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h index 127edfc3..35c3c8fe 100644 --- a/include/jemalloc/internal/spin.h +++ b/include/jemalloc/internal/spin.h @@ -12,7 +12,8 @@ typedef struct { static inline void spin_cpu_spinwait(void) { -# if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) +# if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) && \ + (defined(__GNUC__) || defined(__clang__)) spin_delay_arm(); # elif HAVE_CPU_SPINWAIT CPU_SPINWAIT; diff --git a/include/jemalloc/internal/spin_delay_arm.h b/include/jemalloc/internal/spin_delay_arm.h index 8d5628ed..1a711afe 100644 --- a/include/jemalloc/internal/spin_delay_arm.h +++ b/include/jemalloc/internal/spin_delay_arm.h @@ -1,23 +1,13 @@ #include "jemalloc/internal/jemalloc_preamble.h" -#include -/* Global variable to track SB support, declared as extern to be defined in one TU */ -extern _Atomic int arm_has_sb_instruction; - -/* Constructor function declaration - implementation in spin_delay_arm.c */ -__attribute__((constructor)) -void detect_arm_sb_support(void); +/* Global variable to track SB support */ +extern int arm_has_sb_instruction; /* Use SB instruction if available, otherwise ISB */ -static inline void -spin_delay_arm(void) { -#ifdef HWCAP_SB +static inline void spin_delay_arm(void) { if (__builtin_expect(arm_has_sb_instruction == 1, 1)) { - /* SB instruction encoding */ - asm volatile(".inst 0xd50330ff \n"); + asm volatile(".inst 0xd50330ff \n"); /* SB instruction encoding */ } else { - /* ISB instruction */ asm volatile("isb; \n"); } -#endif // HWCAP_SB } diff --git a/src/spin_delay_arm.c b/src/spin_delay_arm.c index 2aeb37d1..ce9fdbe3 100644 --- a/src/spin_delay_arm.c +++ b/src/spin_delay_arm.c @@ -1,22 +1,15 @@ #include "jemalloc/internal/jemalloc_preamble.h" #include "jemalloc/internal/spin_delay_arm.h" -#include -#if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) +/* Initialize to 0 (false) by default */ +int arm_has_sb_instruction = 0; + +#if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) && \ + (defined(__GNUC__) || defined(__clang__)) #include -#endif // __linux__ && (defined(__aarch64__) || defined(__arm64__)) -/* Global variable to track SB support, defined here to avoid multiple definitions */ -_Atomic int arm_has_sb_instruction = ATOMIC_VAR_INIT(0); - -/* Constructor function to detect hardware capabilities at program startup */ __attribute__((constructor)) -void -detect_arm_sb_support(void) { -#if defined(__linux__) && (defined(__aarch64__) || defined(__arm64__)) - /* Check if SB instruction is supported */ - if (getauxval(AT_HWCAP) & HWCAP_SB) { - atomic_store_explicit(&arm_has_sb_instruction, 1, memory_order_release); - } +void detect_arm_sb_support(void) { + arm_has_sb_instruction = (getauxval(AT_HWCAP) & HWCAP_SB) ? 1 : 0; +} #endif -} \ No newline at end of file