From 48b443ba08f3c3ee4b724fde0c776b51bf9b6d5d Mon Sep 17 00:00:00 2001 From: Milad Fa Date: Thu, 23 Oct 2025 14:18:19 +0000 Subject: [PATCH 1/5] Fix --- hwy/contrib/thread_pool/thread_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hwy/contrib/thread_pool/thread_pool.h b/hwy/contrib/thread_pool/thread_pool.h index 7f647820b8..1f19ab4a8f 100644 --- a/hwy/contrib/thread_pool/thread_pool.h +++ b/hwy/contrib/thread_pool/thread_pool.h @@ -1605,7 +1605,7 @@ class alignas(HWY_ALIGNMENT) ThreadPool { // Returns whether threads were used. If not, there is no need to update // the autotuner config. template - bool RunWithoutAutotune(uint64_t begin, uint64_t end, pool::Caller caller, + bool RunWithoutAutotune(uint64_t begin, uint64_t end, HWY_MAYBE_UNUSED pool::Caller caller, const Closure& closure) { pool::Worker& main = workers_[0]; From cfdeec5da527d1d82b5deaba6ed0cd892a384401 Mon Sep 17 00:00:00 2001 From: George Burgess IV Date: Fri, 24 Oct 2025 07:35:45 -0700 Subject: [PATCH 2/5] Use C headers for `stdarg` and `stdio` PiperOrigin-RevId: 823523857 --- hwy/base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hwy/base.h b/hwy/base.h index a02fe7096f..51ba7383f3 100644 --- a/hwy/base.h +++ b/hwy/base.h @@ -22,8 +22,8 @@ #include #include #if defined(HWY_HEADER_ONLY) -#include -#include +#include +#include #endif #if !defined(HWY_NO_LIBCXX) From 743d0731a94fa6fbd13aed5651771baf3e25d46d Mon Sep 17 00:00:00 2001 From: John Platts Date: Fri, 24 Oct 2025 10:12:55 -0500 Subject: [PATCH 3/5] Re-enabled BF16 on x86 in debug builds with Clang 22 or later --- hwy/base.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hwy/base.h b/hwy/base.h index a02fe7096f..a908b52e0d 100644 --- a/hwy/base.h +++ b/hwy/base.h @@ -1744,9 +1744,9 @@ HWY_F16_CONSTEXPR inline std::partial_ordering operator<=>( // bf16 <-> f32 in convert_test results in 0x2525 for 1.0 instead of 0x3f80. // Reported at https://github.com/llvm/llvm-project/issues/151692. #ifndef HWY_SSE2_HAVE_SCALAR_BF16_TYPE -#if HWY_ARCH_X86 && defined(__SSE2__) && \ - ((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL && \ - !HWY_IS_DEBUG_BUILD) || \ +#if HWY_ARCH_X86 && defined(__SSE2__) && \ + ((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL && \ + (!HWY_IS_DEBUG_BUILD || HWY_COMPILER_CLANG >= 2200)) || \ HWY_COMPILER_GCC_ACTUAL >= 1300) #define HWY_SSE2_HAVE_SCALAR_BF16_TYPE 1 #else From b853b761c2eaf58df3e68c6648ebb50aa34134a0 Mon Sep 17 00:00:00 2001 From: John Platts Date: Fri, 24 Oct 2025 11:09:02 -0500 Subject: [PATCH 4/5] Adjusted hwy/base.h to enable BF16 on x86 with Clang 22.1.1 or later --- hwy/base.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hwy/base.h b/hwy/base.h index a908b52e0d..3339e21e92 100644 --- a/hwy/base.h +++ b/hwy/base.h @@ -1744,9 +1744,9 @@ HWY_F16_CONSTEXPR inline std::partial_ordering operator<=>( // bf16 <-> f32 in convert_test results in 0x2525 for 1.0 instead of 0x3f80. // Reported at https://github.com/llvm/llvm-project/issues/151692. #ifndef HWY_SSE2_HAVE_SCALAR_BF16_TYPE -#if HWY_ARCH_X86 && defined(__SSE2__) && \ - ((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL && \ - (!HWY_IS_DEBUG_BUILD || HWY_COMPILER_CLANG >= 2200)) || \ +#if HWY_ARCH_X86 && defined(__SSE2__) && \ + ((HWY_COMPILER_CLANG >= 1700 && !HWY_COMPILER_CLANGCL && \ + (!HWY_IS_DEBUG_BUILD || HWY_COMPILER3_CLANG >= 220101)) || \ HWY_COMPILER_GCC_ACTUAL >= 1300) #define HWY_SSE2_HAVE_SCALAR_BF16_TYPE 1 #else From 989a498fdf3e2b758c48998e87a2807a1a53e494 Mon Sep 17 00:00:00 2001 From: Jan Wassenberg Date: Fri, 24 Oct 2025 09:31:04 -0700 Subject: [PATCH 5/5] GCC 15 removed avx10.2-512 target PiperOrigin-RevId: 823560321 --- hwy/ops/set_macros-inl.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hwy/ops/set_macros-inl.h b/hwy/ops/set_macros-inl.h index 1dfb197ab9..2a5e1dc1ca 100644 --- a/hwy/ops/set_macros-inl.h +++ b/hwy/ops/set_macros-inl.h @@ -187,12 +187,13 @@ #define HWY_TARGET_STR_AVX3_SPR HWY_TARGET_STR_AVX3_ZEN4 #endif -#if HWY_COMPILER_GCC_ACTUAL >= 1500 || HWY_COMPILER_CLANG >= 2200 -#if HWY_HAVE_EVEX512 +// Support for avx10.2-512 was removed between clang 22 and 23 without a +// feature test macro. +#if HWY_COMPILER_CLANG >= 2200 && HWY_HAVE_EVEX512 #define HWY_TARGET_STR_AVX10_2 HWY_TARGET_STR_AVX3_SPR ",avx10.2-512" -#else +// Recent compilers drop the -512 suffix because 512 bits are always available. +#elif HWY_COMPILER_GCC_ACTUAL >= 1500 || HWY_COMPILER_CLANG >= 2200 #define HWY_TARGET_STR_AVX10_2 HWY_TARGET_STR_AVX3_SPR ",avx10.2" -#endif // HWY_HAVE_EVEX512 #else #define HWY_TARGET_STR_AVX10_2 HWY_TARGET_STR_AVX3_SPR #endif