1212#include "../../zbuild.h"
1313#include "../../adler32_p.h"
1414
15- Z_INTERNAL uint32_t adler32_rvv (uint32_t adler , const uint8_t * buf , size_t len ) {
15+ static inline uint32_t adler32_rvv_impl (uint32_t adler , uint8_t * restrict dst , const uint8_t * src , size_t len , int COPY ) {
1616 /* split Adler-32 into component sums */
1717 uint32_t sum2 = (adler >> 16 ) & 0xffff ;
1818 adler &= 0xffff ;
1919
2020 /* in case user likes doing a byte at a time, keep it fast */
2121 if (len == 1 ) {
22- return adler32_len_1 (adler , buf , sum2 );
22+ if (COPY ) memcpy (dst , src , 1 );
23+ return adler32_len_1 (adler , src , sum2 );
2324 }
2425
2526 /* initial Adler-32 value (deferred check for len == 1 speed) */
26- if (buf == NULL )
27+ if (src == NULL )
2728 return 1L ;
2829
2930 /* in case short lengths are provided, keep it somewhat fast */
3031 if (len < 16 ) {
31- return adler32_len_16 (adler , buf , len , sum2 );
32+ if (COPY ) memcpy (dst , src , len );
33+ return adler32_len_16 (adler , src , len , sum2 );
3234 }
3335
3436 size_t left = len ;
@@ -56,10 +58,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
5658 v_buf16_accu = __riscv_vmv_v_x_u16m2 (0 , vl );
5759 size_t subprob = block_size ;
5860 while (subprob > 0 ) {
59- vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1 (buf , vl );
61+ vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1 (src , vl );
62+ if (COPY ) __riscv_vse8_v_u8m1 (dst , v_buf8 , vl );
6063 v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4 (v_adler32_prev_accu , v_buf16_accu , vl );
6164 v_buf16_accu = __riscv_vwaddu_wv_u16m2 (v_buf16_accu , v_buf8 , vl );
62- buf += vl ;
65+ src += vl ;
66+ if (COPY ) dst += vl ;
6367 subprob -= vl ;
6468 }
6569 v_adler32_prev_accu = __riscv_vmacc_vx_u32m4 (v_adler32_prev_accu , block_size / vl , v_buf32_accu , vl );
@@ -75,10 +79,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
7579 v_buf16_accu = __riscv_vmv_v_x_u16m2 (0 , vl );
7680 size_t res = left ;
7781 while (left >= vl ) {
78- vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1 (buf , vl );
82+ vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1 (src , vl );
83+ if (COPY ) __riscv_vse8_v_u8m1 (dst , v_buf8 , vl );
7984 v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4 (v_adler32_prev_accu , v_buf16_accu , vl );
8085 v_buf16_accu = __riscv_vwaddu_wv_u16m2 (v_buf16_accu , v_buf8 , vl );
81- buf += vl ;
86+ src += vl ;
87+ if (COPY ) dst += vl ;
8288 left -= vl ;
8389 }
8490 v_adler32_prev_accu = __riscv_vmacc_vx_u32m4 (v_adler32_prev_accu , res / vl , v_buf32_accu , vl );
@@ -104,7 +110,8 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
104110 adler += adler_sum ;
105111
106112 while (left -- ) {
107- adler += * buf ++ ;
113+ if (COPY ) * dst ++ = * src ;
114+ adler += * src ++ ;
108115 sum2 += adler ;
109116 }
110117
@@ -114,4 +121,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
114121 return adler | (sum2 << 16 );
115122}
116123
124+ Z_INTERNAL uint32_t adler32_fold_copy_rvv (uint32_t adler , uint8_t * dst , const uint8_t * src , size_t len ) {
125+ return adler32_rvv_impl (adler , dst , src , len , 1 );
126+ }
127+
128+ Z_INTERNAL uint32_t adler32_rvv (uint32_t adler , const uint8_t * buf , size_t len ) {
129+ return adler32_rvv_impl (adler , NULL , buf , len , 0 );
130+ }
131+
117132#endif // RISCV_RVV
0 commit comments