From 731e609d6f3e6210831ef66b9021551fdba83680 Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Tue, 7 Oct 2025 10:15:04 +0000 Subject: [PATCH 1/2] Use Lanes(d) masks for target SVE2_128 In dynamic dispatching, Highway pushes function attributes such as "+sve" onto functions to compile them for specific targets. Clang and GCC rely on the "-msve-vector-bits=128" option to specialize functions for 128 bit SVE vectors, but this flag applies specialized attributes at a compilation unit granularity, and therefore cannot be pushed to individual functions. The absence of this flag means useful information about the vector length of SVE2_128 functions can be lost to intermediate compiler languages such as LLVM IR. Using Lanes(d) for SVE2_128 masked intrinsics gives compilers the ability to size the memory ranges affected by intrinsics. This aids the compiler in eliminating redundant load and stores in dynamic dispatching by enabling alias-analysis optimizations. --- hwy/ops/arm_sve-inl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hwy/ops/arm_sve-inl.h b/hwy/ops/arm_sve-inl.h index 8acc66d416..c3325755a0 100644 --- a/hwy/ops/arm_sve-inl.h +++ b/hwy/ops/arm_sve-inl.h @@ -1,4 +1,5 @@ // Copyright 2021 Google LLC +// Copyright 2025 Arm Limited and/or its affiliates // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -391,6 +392,9 @@ HWY_API svbool_t PFalse() { return svpfalse_b(); } // arithmetic) can ignore d and use PTrue instead. template svbool_t MakeMask(D d) { +#if (HWY_TARGET == HWY_SVE2_128) + return FirstN(d, Lanes(d)); +#endif return IsFull(d) ? PTrue(d) : FirstN(d, Lanes(d)); } From 3ff4a94a8c643a026a168fb9ff50b47d3d65ab3e Mon Sep 17 00:00:00 2001 From: Matthew Devereau Date: Wed, 8 Oct 2025 11:13:45 +0000 Subject: [PATCH 2/2] Add comment to explain changes --- hwy/ops/arm_sve-inl.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hwy/ops/arm_sve-inl.h b/hwy/ops/arm_sve-inl.h index c3325755a0..b5a8cd1acd 100644 --- a/hwy/ops/arm_sve-inl.h +++ b/hwy/ops/arm_sve-inl.h @@ -390,6 +390,12 @@ HWY_API svbool_t PFalse() { return svpfalse_b(); } // // This is used in functions that load/store memory; other functions (e.g. // arithmetic) can ignore d and use PTrue instead. +// +// Always use FirstN(N) for HWY_TARGET == HWY_SVE2_128 to avoid vector length +// information loss when using PTrue(d) predicates in memory intrinsics. +// +// SVE2_256 is untested due to unavailable hardware and cannot assume +// equal minimum and maximum vector lengths as SVE2_128 can. template svbool_t MakeMask(D d) { #if (HWY_TARGET == HWY_SVE2_128)