这是indexloc提供的服务,不要输入任何密码
Skip to content

Commit 7487ef9

Browse files
author
Simon Hosie
committed
drop in rvv variable-length-memset
1 parent 8770d93 commit 7487ef9

File tree

3 files changed

+30
-6
lines changed

3 files changed

+30
-6
lines changed

arch/riscv/chunkset_rvv.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,22 @@ do { \
2929
} while (len > 0); \
3030
} while (0)
3131

32+
static inline void* memcpy_rvv(void* dst, void const* src, size_t len) {
33+
// TODO: m1, m2, m4, or m8 might provide the best balance of predictable
34+
// branches against unnecessary operations. Tune this.
35+
do {
36+
size_t vl = __riscv_vsetvl_e8m2(len);
37+
vuint8m2_t chunk = __riscv_vle8_v_u8m2(src, vl);
38+
__riscv_vse8_v_u8m2(dst, chunk, vl);
39+
dst = (uint8_t*)dst + vl;
40+
src = (uint8_t*)src + vl;
41+
len -= vl;
42+
} while (len > 0);
43+
return dst;
44+
}
45+
46+
#define VARLEN_MEMCPY memcpy_rvv
47+
3248
/* We don't have a 32-byte datatype for RISC-V arch. */
3349
typedef struct chunk_s {
3450
uint64_t data[4];
@@ -93,14 +109,14 @@ static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len
93109
len -= align;
94110
ptrdiff_t dist = out - from;
95111
if (dist >= len) {
96-
memcpy(out, from, len);
112+
memcpy_rvv(out, from, len);
97113
out += len;
98114
from += len;
99115
return out;
100116
}
101117
if (dist >= sizeof(chunk_t)) {
102118
dist = (dist / sizeof(chunk_t)) * sizeof(chunk_t);
103-
memcpy(out, from, dist);
119+
memcpy_rvv(out, from, dist);
104120
out += dist;
105121
from += dist;
106122
len -= dist;

chunkset_tpl.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
extern uint8_t* chunkmemset_ssse3(uint8_t *out, unsigned dist, unsigned len);
1010
#endif
1111

12+
#if !defined VARLEN_MEMCPY
13+
#define VARLEN_MEMCPY memcpy
14+
#endif
15+
1216
/* Returns the chunk size */
1317
Z_INTERNAL uint32_t CHUNKSIZE(void) {
1418
return sizeof(chunk_t);
@@ -79,7 +83,7 @@ static inline chunk_t GET_CHUNK_MAG(uint8_t *buf, uint32_t *chunk_rem, uint32_t
7983
uint8_t *cur_chunk = (uint8_t *)&chunk_load;
8084
while (bytes_remaining) {
8185
cpy_dist = MIN(dist, bytes_remaining);
82-
memcpy(cur_chunk, buf, cpy_dist);
86+
VARLEN_MEMCPY(cur_chunk, buf, cpy_dist);
8387
bytes_remaining -= cpy_dist;
8488
cur_chunk += cpy_dist;
8589
/* This allows us to bypass an expensive integer division since we're effectively
@@ -161,7 +165,7 @@ Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
161165
}
162166

163167
if (len) {
164-
memcpy(out, &chunk_load, len);
168+
VARLEN_MEMCPY(out, &chunk_load, len);
165169
out += len;
166170
}
167171

inflate_p.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
# define INFLATE_GET_DICTIONARY_HOOK(strm, dict, dict_len) do {} while (0)
4040
#endif
4141

42+
#if !defined VARLEN_MEMCPY
43+
#define VARLEN_MEMCPY memcpy
44+
#endif
45+
4246
/*
4347
* Macros shared by inflate() and inflateBack()
4448
*/
@@ -157,7 +161,7 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
157161

158162
/* For all cases without overlap, memcpy is ideal */
159163
if (!(olap_src || olap_dst)) {
160-
memcpy(out, from, (size_t)len);
164+
VARLEN_MEMCPY(out, from, (size_t)len);
161165
return out + len;
162166
}
163167

@@ -173,7 +177,7 @@ static inline uint8_t* chunkcopy_safe(uint8_t *out, uint8_t *from, uint64_t len,
173177
* behind or lookahead distance. */
174178
uint64_t non_olap_size = llabs(from - out); // llabs vs labs for compatibility with windows
175179

176-
memcpy(out, from, (size_t)non_olap_size);
180+
VARLEN_MEMCPY(out, from, (size_t)non_olap_size);
177181
out += non_olap_size;
178182
from += non_olap_size;
179183
len -= non_olap_size;

0 commit comments

Comments
 (0)