From 8ae9de699784defb318cc4f6250eccaed9b44983 Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Sun, 13 Apr 2025 08:19:45 -0400 Subject: [PATCH 1/6] Add pure FixedPoint sqrt implementation and tests --- libopenage/util/fixed_point.h | 32 ++++++++++++++++++++++- libopenage/util/fixed_point_test.cpp | 38 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h index 2d6a7eb084..337c3ceb4e 100644 --- a/libopenage/util/fixed_point.h +++ b/libopenage/util/fixed_point.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include #include @@ -446,8 +447,37 @@ class FixedPoint { return is; } + /** + * Pure FixedPoint sqrt implementation using Heron's Algorithm. + * + * Note that this function is undefined for negative values. + */ constexpr double sqrt() { - return std::sqrt(this->to_double()); + // Zero can cause issues later, so deal with now. + if (this->raw_value == 0) { + return 0.0; + } + + // A greater shift = more precision, but can overflow the intermediate type if too large. + size_t max_shift = std::countl_zero((unsigned_intermediate_type)this->raw_value) - 1; + size_t shift = max_shift > fractional_bits ? fractional_bits : max_shift; + shift &= ~1; + + // We can't use the safe shift since the shift value is unknown at compile time. + intermediate_type n = (intermediate_type)this->raw_value << shift; + intermediate_type guess = (intermediate_type)1 << fractional_bits; + + for (size_t _i = 0; _i < fractional_bits; _i++) { + intermediate_type prev = guess; + guess = (guess + n / guess) / 2; + if (guess == prev) + break; + } + + // The sqrt operation halves the number of bits, so we'll we'll have to calculate a shift back + size_t unshift = fractional_bits - (shift + fractional_bits) / 2; + + return from_raw_value(guess << unshift).to_double(); } constexpr double atan2(const FixedPoint &n) { diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp index 21fd788259..f5951ac267 100644 --- a/libopenage/util/fixed_point_test.cpp +++ b/libopenage/util/fixed_point_test.cpp @@ -156,6 +156,44 @@ void fixed_point() { TESTEQUALS_FLOAT((c/b).to_double(), -4.75/3.5, 0.1); } + // Pure FixedPoint sqrt tests + { + using T = FixedPoint; + TESTEQUALS_FLOAT(T(41231.131).sqrt(), 203.0545025356, 1e-7); + TESTEQUALS_FLOAT(T(547965.116).sqrt(), 740.2466588915, 1e-7); + + TESTEQUALS_FLOAT(T(2).sqrt(), T::sqrt_2(), 1e-9); + TESTEQUALS_FLOAT(2 / T::pi().sqrt(), T::inv2_sqrt_pi(), 1e-9); + + // Powers of two (anything over 2^15 will overflow (2^16)^2 = 2^32 >). + for (size_t i = 0; i < 15; i++) { + int64_t value = 1 << i; + TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7); + } + + for (size_t i = 0; i < 100; i++) { + double value = 14.25 * i; + TESTEQUALS_FLOAT(T(value * value).sqrt(), value, 1e-7); + } + + // This one can go up to 2^63, but that would take years. + for (uint32_t i = 0; i < (1u << 16); i++) { + T value = T::from_raw_value(i * i); + TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-7); + } + + // We lose some precision when raw_type == intermediate_type + for (uint64_t i = 1; i < (1ul << 63); i = (i << 1) ^ i) { + T value = T::from_raw_value(i * i); + TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4); + } + + using FP16_16 = FixedPoint; + for (uint32_t i = 0; i < (1u << 16); i++) { + FP16_16 value = FP16_16::from_raw_value(i); + TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4); + } + } } }}} // openage::util::tests From 9890d12319de2c4a5425895d00a06f1b2fc511c0 Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Sun, 13 Apr 2025 19:26:52 -0400 Subject: [PATCH 2/6] Update copying.md --- copying.md | 1 + 1 file changed, 1 insertion(+) diff --git a/copying.md b/copying.md index d8430fd215..8c61b294e2 100644 --- a/copying.md +++ b/copying.md @@ -166,6 +166,7 @@ _the openage authors_ are: | Eelco Empting | Eeelco | me à eelco dawt de | | Jordan Sutton | jsutCodes | jsutcodes à gmail dawt com | | Daniel Wieczorek | Danio | danielwieczorek96 à gmail dawt com | +| | bytegrrrl | bytegrrrl à proton dawt me | If you're a first-time committer, add yourself to the above list. This is not just for legal reasons, but also to keep an overview of all those nicknames. From a51a90e3b1ef56c7167e4fa1ac19f14b3be0bda4 Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Mon, 21 Apr 2025 03:13:39 -0400 Subject: [PATCH 3/6] Update FixedPoint::sqrt() to return a FixedPoint --- libopenage/util/fixed_point.h | 32 ++++++++++++++++++++-------- libopenage/util/fixed_point_test.cpp | 18 +++++++++++----- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h index 337c3ceb4e..6cab0167f7 100644 --- a/libopenage/util/fixed_point.h +++ b/libopenage/util/fixed_point.h @@ -451,33 +451,47 @@ class FixedPoint { * Pure FixedPoint sqrt implementation using Heron's Algorithm. * * Note that this function is undefined for negative values. + * + * There's a small loss in precision depending on the value of fractional_bits and the position of + * the most significant bit: if the integer portion is very large, we won't have as much (absolute) + * precision. Ideally you would want the intermediate_type to be twice the size of raw_type to avoid + * any losses. */ - constexpr double sqrt() { + constexpr FixedPoint sqrt() { // Zero can cause issues later, so deal with now. if (this->raw_value == 0) { return 0.0; } + // Check for negative values + ENSURE(std::is_unsigned() or std::is_signed() and this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values."); + // A greater shift = more precision, but can overflow the intermediate type if too large. - size_t max_shift = std::countl_zero((unsigned_intermediate_type)this->raw_value) - 1; + size_t max_shift = std::countl_zero(static_cast(this->raw_value)) - 1; size_t shift = max_shift > fractional_bits ? fractional_bits : max_shift; - shift &= ~1; + + // shift + fractional bits must be an even number + if ((shift + fractional_bits) % 2) { + shift -= 1; + } // We can't use the safe shift since the shift value is unknown at compile time. - intermediate_type n = (intermediate_type)this->raw_value << shift; - intermediate_type guess = (intermediate_type)1 << fractional_bits; + intermediate_type n = static_cast(this->raw_value) << shift; + intermediate_type guess = static_cast(1) << fractional_bits; - for (size_t _i = 0; _i < fractional_bits; _i++) { + for (size_t i = 0; i < fractional_bits; i++) { intermediate_type prev = guess; guess = (guess + n / guess) / 2; - if (guess == prev) + if (guess == prev) { break; + + } } // The sqrt operation halves the number of bits, so we'll we'll have to calculate a shift back size_t unshift = fractional_bits - (shift + fractional_bits) / 2; - return from_raw_value(guess << unshift).to_double(); + return from_raw_value(guess << unshift); } constexpr double atan2(const FixedPoint &n) { @@ -604,7 +618,7 @@ namespace std { template constexpr double sqrt(openage::util::FixedPoint n) { - return n.sqrt(); + return static_cast(n.sqrt()); } template diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp index f5951ac267..64ba8f6b2c 100644 --- a/libopenage/util/fixed_point_test.cpp +++ b/libopenage/util/fixed_point_test.cpp @@ -163,7 +163,7 @@ void fixed_point() { TESTEQUALS_FLOAT(T(547965.116).sqrt(), 740.2466588915, 1e-7); TESTEQUALS_FLOAT(T(2).sqrt(), T::sqrt_2(), 1e-9); - TESTEQUALS_FLOAT(2 / T::pi().sqrt(), T::inv2_sqrt_pi(), 1e-9); + TESTEQUALS_FLOAT(2 / std::sqrt(T::pi()), T::inv2_sqrt_pi(), 1e-9); // Powers of two (anything over 2^15 will overflow (2^16)^2 = 2^32 >). for (size_t i = 0; i < 15; i++) { @@ -177,23 +177,31 @@ void fixed_point() { } // This one can go up to 2^63, but that would take years. - for (uint32_t i = 0; i < (1u << 16); i++) { + for (uint32_t i = 0; i < 65536; i++) { T value = T::from_raw_value(i * i); TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-7); } // We lose some precision when raw_type == intermediate_type - for (uint64_t i = 1; i < (1ul << 63); i = (i << 1) ^ i) { + for (uint64_t i = 1; i < std::numeric_limits::max(); i = (i * 2) ^ i) { T value = T::from_raw_value(i * i); + if (value < 0) { + value = -value; + } TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4); } using FP16_16 = FixedPoint; - for (uint32_t i = 0; i < (1u << 16); i++) { + for (uint32_t i = 0; i < 65536; i++) { FP16_16 value = FP16_16::from_raw_value(i); TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4); } + + + // Test with negative number + TESTTHROWS((FixedPoint::from_float(-3.25).sqrt())); + TESTNOEXCEPT((FixedPoint::from_float(3.25).sqrt())); + TESTNOEXCEPT((FixedPoint::from_float(-3.25).sqrt())); } } - }}} // openage::util::tests From 4a7d664ce0104ef3b966f15c5b1f90329b7099dc Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Mon, 21 Apr 2025 04:55:08 -0400 Subject: [PATCH 4/6] Fix sqrt test cases --- libopenage/util/fixed_point.h | 6 +++--- libopenage/util/fixed_point_test.cpp | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h index 6cab0167f7..2b3a97797a 100644 --- a/libopenage/util/fixed_point.h +++ b/libopenage/util/fixed_point.h @@ -460,11 +460,11 @@ class FixedPoint { constexpr FixedPoint sqrt() { // Zero can cause issues later, so deal with now. if (this->raw_value == 0) { - return 0.0; + return zero(); } - // Check for negative values - ENSURE(std::is_unsigned() or std::is_signed() and this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values."); + // Check for negative values + ENSURE(std::is_unsigned() or std::is_signed() and this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values."); // A greater shift = more precision, but can overflow the intermediate type if too large. size_t max_shift = std::countl_zero(static_cast(this->raw_value)) - 1; diff --git a/libopenage/util/fixed_point_test.cpp b/libopenage/util/fixed_point_test.cpp index 64ba8f6b2c..b3690b8d15 100644 --- a/libopenage/util/fixed_point_test.cpp +++ b/libopenage/util/fixed_point_test.cpp @@ -192,7 +192,7 @@ void fixed_point() { } using FP16_16 = FixedPoint; - for (uint32_t i = 0; i < 65536; i++) { + for (uint32_t i = 1; i < 65536; i++) { FP16_16 value = FP16_16::from_raw_value(i); TESTEQUALS_FLOAT(value.sqrt(), std::sqrt(value.to_double()), 1e-4); } @@ -204,4 +204,5 @@ void fixed_point() { TESTNOEXCEPT((FixedPoint::from_float(-3.25).sqrt())); } } + }}} // openage::util::tests From 55196f1f92492794a12423ece919d6153f66a852 Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Tue, 22 Apr 2025 00:34:11 -0400 Subject: [PATCH 5/6] Fix formatting --- libopenage/util/fixed_point.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h index 2b3a97797a..d8d54e3550 100644 --- a/libopenage/util/fixed_point.h +++ b/libopenage/util/fixed_point.h @@ -451,11 +451,11 @@ class FixedPoint { * Pure FixedPoint sqrt implementation using Heron's Algorithm. * * Note that this function is undefined for negative values. - * - * There's a small loss in precision depending on the value of fractional_bits and the position of - * the most significant bit: if the integer portion is very large, we won't have as much (absolute) - * precision. Ideally you would want the intermediate_type to be twice the size of raw_type to avoid - * any losses. + * + * There's a small loss in precision depending on the value of fractional_bits and the position of + * the most significant bit: if the integer portion is very large, we won't have as much (absolute) + * precision. Ideally you would want the intermediate_type to be twice the size of raw_type to avoid + * any losses. */ constexpr FixedPoint sqrt() { // Zero can cause issues later, so deal with now. @@ -464,16 +464,16 @@ class FixedPoint { } // Check for negative values - ENSURE(std::is_unsigned() or std::is_signed() and this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values."); + ENSURE(std::is_unsigned() or (std::is_signed() and this->raw_value > 0), "FixedPoint::sqrt() is undefined for negative values."); // A greater shift = more precision, but can overflow the intermediate type if too large. size_t max_shift = std::countl_zero(static_cast(this->raw_value)) - 1; size_t shift = max_shift > fractional_bits ? fractional_bits : max_shift; - // shift + fractional bits must be an even number - if ((shift + fractional_bits) % 2) { - shift -= 1; - } + // shift + fractional bits must be an even number + if ((shift + fractional_bits) % 2) { + shift -= 1; + } // We can't use the safe shift since the shift value is unknown at compile time. intermediate_type n = static_cast(this->raw_value) << shift; @@ -484,8 +484,7 @@ class FixedPoint { guess = (guess + n / guess) / 2; if (guess == prev) { break; - - } + } } // The sqrt operation halves the number of bits, so we'll we'll have to calculate a shift back From 292d6dadd5ecb7977aad6f4ba0b615ac4cc0fac4 Mon Sep 17 00:00:00 2001 From: bytegrrrl Date: Wed, 23 Apr 2025 18:20:19 -0400 Subject: [PATCH 6/6] Optimize FixedPoint::sqrt() signedness check Co-authored-by: Christoph Heine <6852422+heinezen@users.noreply.github.com> --- libopenage/util/fixed_point.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libopenage/util/fixed_point.h b/libopenage/util/fixed_point.h index d8d54e3550..c751238cdf 100644 --- a/libopenage/util/fixed_point.h +++ b/libopenage/util/fixed_point.h @@ -464,7 +464,9 @@ class FixedPoint { } // Check for negative values - ENSURE(std::is_unsigned() or (std::is_signed() and this->raw_value > 0), "FixedPoint::sqrt() is undefined for negative values."); + if constexpr (std::is_signed()) { + ENSURE(this->raw_value > 0, "FixedPoint::sqrt() is undefined for negative values."); + } // A greater shift = more precision, but can overflow the intermediate type if too large. size_t max_shift = std::countl_zero(static_cast(this->raw_value)) - 1;