这是indexloc提供的服务,不要输入任何密码
Skip to content

Commit c97c63e

Browse files
author
Simon Hosie
committed
Add adler32_fold_copy_rvv implementation.
1 parent 8770d93 commit c97c63e

File tree

3 files changed

+28
-9
lines changed

3 files changed

+28
-9
lines changed

arch/riscv/adler32_rvv.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,25 @@
1212
#include "../../zbuild.h"
1313
#include "../../adler32_p.h"
1414

15-
Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) {
15+
static inline uint32_t adler32_rvv_impl(uint32_t adler, uint8_t* restrict dst, const uint8_t *src, size_t len, int COPY) {
1616
/* split Adler-32 into component sums */
1717
uint32_t sum2 = (adler >> 16) & 0xffff;
1818
adler &= 0xffff;
1919

2020
/* in case user likes doing a byte at a time, keep it fast */
2121
if (len == 1) {
22-
return adler32_len_1(adler, buf, sum2);
22+
if (COPY) memcpy(dst, src, 1);
23+
return adler32_len_1(adler, src, sum2);
2324
}
2425

2526
/* initial Adler-32 value (deferred check for len == 1 speed) */
26-
if (buf == NULL)
27+
if (src == NULL)
2728
return 1L;
2829

2930
/* in case short lengths are provided, keep it somewhat fast */
3031
if (len < 16) {
31-
return adler32_len_16(adler, buf, len, sum2);
32+
if (COPY) memcpy(dst, src, len);
33+
return adler32_len_16(adler, src, len, sum2);
3234
}
3335

3436
size_t left = len;
@@ -56,10 +58,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
5658
v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl);
5759
size_t subprob = block_size;
5860
while (subprob > 0) {
59-
vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(buf, vl);
61+
vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl);
62+
if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl);
6063
v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl);
6164
v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl);
62-
buf += vl;
65+
src += vl;
66+
if (COPY) dst += vl;
6367
subprob -= vl;
6468
}
6569
v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, block_size / vl, v_buf32_accu, vl);
@@ -75,10 +79,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
7579
v_buf16_accu = __riscv_vmv_v_x_u16m2(0, vl);
7680
size_t res = left;
7781
while (left >= vl) {
78-
vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(buf, vl);
82+
vuint8m1_t v_buf8 = __riscv_vle8_v_u8m1(src, vl);
83+
if (COPY) __riscv_vse8_v_u8m1(dst, v_buf8, vl);
7984
v_adler32_prev_accu = __riscv_vwaddu_wv_u32m4(v_adler32_prev_accu, v_buf16_accu, vl);
8085
v_buf16_accu = __riscv_vwaddu_wv_u16m2(v_buf16_accu, v_buf8, vl);
81-
buf += vl;
86+
src += vl;
87+
if (COPY) dst += vl;
8288
left -= vl;
8389
}
8490
v_adler32_prev_accu = __riscv_vmacc_vx_u32m4(v_adler32_prev_accu, res / vl, v_buf32_accu, vl);
@@ -104,7 +110,8 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
104110
adler += adler_sum;
105111

106112
while (left--) {
107-
adler += *buf++;
113+
if (COPY) *dst++ = *src;
114+
adler += *src++;
108115
sum2 += adler;
109116
}
110117

@@ -114,4 +121,12 @@ Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len)
114121
return adler | (sum2 << 16);
115122
}
116123

124+
Z_INTERNAL uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
125+
return adler32_rvv_impl(adler, dst, src, len, 1);
126+
}
127+
128+
Z_INTERNAL uint32_t adler32_rvv(uint32_t adler, const uint8_t *buf, size_t len) {
129+
return adler32_rvv_impl(adler, NULL, buf, len, 0);
130+
}
131+
117132
#endif // RISCV_RVV

cpu_features.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
7070
#endif
7171

7272
/* adler32 folding */
73+
#ifdef RISCV_RVV
74+
extern uint32_t adler32_fold_copy_rvv(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
75+
#endif
7376
#ifdef X86_SSE42
7477
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
7578
#endif

functable.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ static void init_functable(void) {
215215
#ifdef RISCV_RVV
216216
if (cf.riscv.has_rvv) {
217217
ft.adler32 = &adler32_rvv;
218+
ft.adler32_fold_copy = &adler32_fold_copy_rvv;
218219
ft.chunkmemset_safe = &chunkmemset_safe_rvv;
219220
ft.chunksize = &chunksize_rvv;
220221
ft.compare256 = &compare256_rvv;

0 commit comments

Comments
 (0)