这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions hwy/contrib/algo/transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ struct TestGenerate {
// TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
// the attribute also applies to lambdas? If so, remove HWY_ATTR.
#if HWY_GENERIC_LAMBDA
const auto gen2 = [](const auto d, const auto vidx)
HWY_ATTR { return BitCast(d, Add(vidx, vidx)); };
const auto gen2 = [](const auto d2, const auto vidx)
HWY_ATTR { return BitCast(d2, Add(vidx, vidx)); };
#else
const Gen2 gen2;
#endif
Expand Down Expand Up @@ -245,8 +245,8 @@ struct TestTransform {
// TODO(janwas): can we update the apply_to in HWY_PUSH_ATTRIBUTES so that
// the attribute also applies to lambdas? If so, remove HWY_ATTR.
#if HWY_GENERIC_LAMBDA
const auto scal = [](const auto d, const auto v) HWY_ATTR {
return Mul(Set(d, ConvertScalarTo<T>(kAlpha)), v);
const auto scal = [](const auto d2, const auto v) HWY_ATTR {
return Mul(Set(d2, ConvertScalarTo<T>(kAlpha)), v);
};
#else
const SCAL scal;
Expand Down Expand Up @@ -290,8 +290,8 @@ struct TestTransform1 {
SimpleAXPY(a, b, expected.get(), count);

#if HWY_GENERIC_LAMBDA
const auto axpy = [](const auto d, const auto v, const auto v1) HWY_ATTR {
return MulAdd(Set(d, ConvertScalarTo<T>(kAlpha)), v, v1);
const auto axpy = [](const auto d2, const auto v, const auto v1) HWY_ATTR {
return MulAdd(Set(d2, ConvertScalarTo<T>(kAlpha)), v, v1);
};
#else
const AXPY axpy;
Expand Down
8 changes: 4 additions & 4 deletions hwy/contrib/matvec/matvec-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ HWY_NOINLINE void MatVecAddImpl(const T* HWY_RESTRICT mat,
// Process multiple rows at a time so that we write multiples of a cache line
// to avoid false sharing (>= 64). 128 is better than 256. 512 has too little
// parallelization potential.
constexpr size_t kChunkSize = 64 / sizeof(T);
const uint64_t num_chunks = static_cast<uint64_t>(kOuter / kChunkSize);
constexpr size_t kChunkSize2 = 64 / sizeof(T);
const uint64_t num_chunks = static_cast<uint64_t>(kOuter / kChunkSize2);

const ScalableTag<T> d;
const size_t N = Lanes(d);
// Required for Stream loop, otherwise we might have partial vectors.
HWY_DASSERT(kChunkSize >= N);
HWY_DASSERT(kChunkSize2 >= N);
pool.Run(0, num_chunks,
[&](const uint64_t chunk, size_t /*thread*/) HWY_ATTR {
// MSVC workaround: duplicate to ensure constexpr.
Expand Down Expand Up @@ -126,7 +126,7 @@ HWY_NOINLINE void MatVecAddImpl(const T* HWY_RESTRICT mat,
hwy::FlushStream();

// Handle remainder rows which are not a multiple of the chunk size.
for (size_t r = num_chunks * kChunkSize; r < kOuter; ++r) {
for (size_t r = num_chunks * kChunkSize2; r < kOuter; ++r) {
auto sum0 = Zero(d);

const T* HWY_RESTRICT row = &mat[r * kInner];
Expand Down
2 changes: 2 additions & 0 deletions hwy/contrib/sort/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ cc_library(
":vxsort", # required if HAVE_VXSORT
"//:algo",
"//:hwy",
"//:nanobenchmark",
],
)

Expand All @@ -201,6 +202,7 @@ cc_library(
deps = [
"//:algo",
"//:hwy",
"//:nanobenchmark",
],
)

Expand Down
4 changes: 1 addition & 3 deletions hwy/contrib/sort/algo-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <functional> // std::less, std::greater
#include <vector>

#include "hwy/base.h"
#include "hwy/contrib/sort/vqsort.h"
#include "hwy/highway.h"
#include "hwy/print.h"
Expand Down Expand Up @@ -606,9 +607,6 @@ void Run(Algo algo, KeyType* inout, size_t num_keys, SharedState& shared,
return CallHeapPartialSort(inout, num_keys, k_keys, Order());
case Algo::kHeapSelect:
return CallHeapSelect(inout, num_keys, k_keys, Order());

default:
HWY_ABORT("Not implemented");
}
}

Expand Down
1 change: 0 additions & 1 deletion hwy/contrib/sort/bench_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ enum class BenchmarkModes {
std::vector<size_t> SizesToBenchmark(BenchmarkModes mode) {
std::vector<size_t> sizes;
switch (mode) {
default:
case BenchmarkModes::kDefault:
#if HAVE_PARALLEL_IPS4O || SORT_100M
sizes.push_back(100 * 1000 * size_t{1000});
Expand Down
20 changes: 10 additions & 10 deletions hwy/contrib/sort/result-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

#include "hwy/aligned_allocator.h"
#include "hwy/base.h"
#include "hwy/contrib/sort/order.h"
#include "hwy/per_target.h" // DispatchedTarget
#include "hwy/targets.h" // TargetName

Expand All @@ -51,16 +50,17 @@ static inline double SummarizeMeasurements(std::vector<double>& seconds) {

struct SortResult {
SortResult() {}
SortResult(const Algo algo, Dist dist, size_t num_keys, size_t num_threads,
double sec, size_t sizeof_key, const char* key_name)
SortResult(Algo algo_in, Dist dist_in, size_t num_keys_in,
size_t num_threads_in, double sec_in, size_t sizeof_key_in,
const char* key_name_in)
: target(DispatchedTarget()),
algo(algo),
dist(dist),
num_keys(num_keys),
num_threads(num_threads),
sec(sec),
sizeof_key(sizeof_key),
key_name(key_name) {}
algo(algo_in),
dist(dist_in),
num_keys(num_keys_in),
num_threads(num_threads_in),
sec(sec_in),
sizeof_key(sizeof_key_in),
key_name(key_name_in) {}

void Print() const {
const double bytes = static_cast<double>(num_keys) *
Expand Down
4 changes: 4 additions & 0 deletions hwy/contrib/sort/sort_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
#include <random>
#include <vector>

#if !defined(HWY_DISABLED_TARGETS) && HWY_IS_DEBUG_BUILD
#define HWY_DISABLED_TARGETS (HWY_SSE2 | HWY_SSSE3)
#endif

#include "hwy/aligned_allocator.h" // IsAligned
#include "hwy/base.h"
#include "hwy/contrib/sort/vqsort.h"
Expand Down
7 changes: 4 additions & 3 deletions hwy/contrib/sort/vqsort_f16a.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "hwy/contrib/sort/vqsort.h" // VQSort
#include "hwy/nanobenchmark.h" // Unpredictable1

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16a.cc"
Expand All @@ -33,7 +34,7 @@ void SortF16Asc(float16_t* HWY_RESTRICT keys, const size_t num) {
#else
(void)keys;
(void)num;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand All @@ -45,7 +46,7 @@ void PartialSortF16Asc(float16_t* HWY_RESTRICT keys, const size_t num,
(void)keys;
(void)num;
(void)k;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand All @@ -57,7 +58,7 @@ void SelectF16Asc(float16_t* HWY_RESTRICT keys, const size_t num,
(void)keys;
(void)num;
(void)k;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand Down
7 changes: 4 additions & 3 deletions hwy/contrib/sort/vqsort_f16d.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
// limitations under the License.

#include "hwy/contrib/sort/vqsort.h" // VQSort
#include "hwy/nanobenchmark.h" //

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16d.cc"
Expand All @@ -33,7 +34,7 @@ void SortF16Desc(float16_t* HWY_RESTRICT keys, const size_t num) {
#else
(void)keys;
(void)num;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand All @@ -45,7 +46,7 @@ void PartialSortF16Desc(float16_t* HWY_RESTRICT keys, const size_t num,
(void)keys;
(void)num;
(void)k;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand All @@ -57,7 +58,7 @@ void SelectF16Desc(float16_t* HWY_RESTRICT keys, const size_t num,
(void)keys;
(void)num;
(void)k;
HWY_ASSERT(0);
if (Unpredictable1()) HWY_ASSERT(0);
#endif
}

Expand Down
2 changes: 0 additions & 2 deletions hwy/contrib/thread_pool/spin.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,6 @@ static inline const char* ToString(SpinType type) {
return "Pause";
case SpinType::kSentinel:
return nullptr;
default:
HWY_UNREACHABLE;
}
}

Expand Down
6 changes: 2 additions & 4 deletions hwy/contrib/thread_pool/thread_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ static inline const char* ToString(WaitType type) {
return "Separate";
case WaitType::kSentinel:
return nullptr;
default:
HWY_UNREACHABLE;
}
}

Expand Down Expand Up @@ -226,8 +224,8 @@ struct Config { // 4 bytes
return buf;
}

Config(SpinType spin_type, WaitType wait_type)
: spin_type(spin_type), wait_type(wait_type) {}
Config(SpinType spin_type_in, WaitType wait_type_in)
: spin_type(spin_type_in), wait_type(wait_type_in) {}
// Workers initially spin until ThreadPool sends them their actual config.
Config() : Config(SpinType::kPause, WaitType::kSpinSeparate) {}

Expand Down
9 changes: 5 additions & 4 deletions hwy/contrib/thread_pool/thread_pool_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,11 @@ TEST(ThreadPoolTest, TestPool) {
mementos[task - begin].store(1000 + task);

// Re-entering Run is fine on a 0-worker pool.
inner.Run(begin, end, [begin, end](uint64_t task, size_t worker) {
HWY_ASSERT(worker == 0);
HWY_ASSERT(begin <= task && task < end);
});
inner.Run(begin, end,
[begin, end](uint64_t inner_task, size_t inner_worker) {
HWY_ASSERT(inner_worker == 0);
HWY_ASSERT(begin <= inner_task && inner_task < end);
});
};

for (size_t num_threads = 0; num_threads <= 6; num_threads += 3) {
Expand Down
2 changes: 1 addition & 1 deletion hwy/perf_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class PerfCounters {
case kMigrations:
return "migration";
default:
HWY_ABORT("Bug: unknown counter %d", c);
HWY_UNREACHABLE;
}
}

Expand Down
22 changes: 14 additions & 8 deletions hwy/print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100) {
}
}

// The NOLINT are to suppress the warning about passing 100 instead of
// `sizeof(string100)`, which is a pointer.
HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
char* string100) {
if (info.sizeof_t == 1) {
Expand All @@ -52,12 +54,14 @@ HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
} else if (info.sizeof_t == 2) {
if (info.is_bf16) {
const double value = static_cast<double>(F32FromBF16Mem(ptr));
const char* fmt = hwy::ScalarAbs(value) < 1E-3 ? "%.3E" : "%.3f";
snprintf(string100, 100, fmt, value); // NOLINT
// NOLINTNEXTLINE
snprintf(string100, 100, hwy::ScalarAbs(value) < 1E-3 ? "%.3E" : "%.3f",
value);
} else if (info.is_float) {
const double value = static_cast<double>(F32FromF16Mem(ptr));
const char* fmt = hwy::ScalarAbs(value) < 1E-4 ? "%.4E" : "%.4f";
snprintf(string100, 100, fmt, value); // NOLINT
// NOLINTNEXTLINE
snprintf(string100, 100, hwy::ScalarAbs(value) < 1E-4 ? "%.4E" : "%.4f",
value);
} else {
uint16_t bits;
CopyBytes<2>(ptr, &bits);
Expand All @@ -67,8 +71,9 @@ HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
if (info.is_float) {
float value;
CopyBytes<4>(ptr, &value);
const char* fmt = hwy::ScalarAbs(value) < 1E-6 ? "%.9E" : "%.9f";
snprintf(string100, 100, fmt, static_cast<double>(value)); // NOLINT
// NOLINTNEXTLINE
snprintf(string100, 100, hwy::ScalarAbs(value) < 1E-6 ? "%.9E" : "%.9f",
static_cast<double>(value));
} else if (info.is_signed) {
int32_t value;
CopyBytes<4>(ptr, &value);
Expand All @@ -82,8 +87,9 @@ HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr,
if (info.is_float) {
double value;
CopyBytes<8>(ptr, &value);
const char* fmt = hwy::ScalarAbs(value) < 1E-9 ? "%.18E" : "%.18f";
snprintf(string100, 100, fmt, value); // NOLINT
// NOLINTNEXTLINE
snprintf(string100, 100, hwy::ScalarAbs(value) < 1E-9 ? "%.18E" : "%.18f",
value);
} else {
const uint8_t* ptr8 = reinterpret_cast<const uint8_t*>(ptr);
uint32_t lo, hi;
Expand Down
Loading