diff --git a/arch/riscv/adler32_rvv.c b/arch/riscv/adler32_rvv.c index d0f9aaa567..d822d75af6 100644 --- a/arch/riscv/adler32_rvv.c +++ b/arch/riscv/adler32_rvv.c @@ -72,6 +72,7 @@ static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, c /* do modulo once each block of NMAX size */ if (++cnt >= nmax_limit) { v_adler32_prev_accu = __riscv_vremu_vx_u32m4(v_adler32_prev_accu, BASE, vl); + v_buf32_accu = __riscv_vremu_vx_u32m4(v_buf32_accu, BASE, vl); cnt = 0; } } @@ -99,16 +100,19 @@ static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, c vuint32m1_t v_sum2_sum = __riscv_vmv_s_x_u32m1(0, vl); v_sum2_sum = __riscv_vredsum_vs_u32m4_u32m1(v_sum32_accu, v_sum2_sum, vl); - uint32_t sum2_sum = __riscv_vmv_x_s_u32m1_u32(v_sum2_sum); + uint32_t sum2_sum = __riscv_vmv_x_s_u32m1_u32(v_sum2_sum) % BASE; - sum2 += (sum2_sum + adler * (len - left)); + sum2 += (sum2_sum + adler * ((len - left) % BASE)); vuint32m1_t v_adler_sum = __riscv_vmv_s_x_u32m1(0, vl); v_adler_sum = __riscv_vredsum_vs_u32m4_u32m1(v_buf32_accu, v_adler_sum, vl); - uint32_t adler_sum = __riscv_vmv_x_s_u32m1_u32(v_adler_sum); + uint32_t adler_sum = __riscv_vmv_x_s_u32m1_u32(v_adler_sum) % BASE; adler += adler_sum; + sum2 %= BASE; + adler %= BASE; + while (left--) { if (COPY) *dst++ = *src; adler += *src++;