这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 19 additions & 30 deletions arch/x86/chunkset_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,22 @@
_mm256_storeu_si256((__m256i *)out, *chunk);
}

static inline void storechunk_mask(uint8_t *out, mask_t mask, chunk_t *chunk) {
_mm256_mask_storeu_epi8(out, mask, *chunk);
}

static inline uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
Assert(len > 0, "chunkcopy should never have a length 0");

unsigned rem = len % sizeof(chunk_t);
mask_t rem_mask = gen_mask(rem);

/* Since this is only ever called if dist >= a chunk, we don't need a masked load */
chunk_t chunk;
uint32_t rem = len % sizeof(chunk_t);

Check warning on line 70 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L70

Added line #L70 was not covered by tests

if (len < sizeof(chunk_t)) {
mask_t rem_mask = gen_mask(rem);
chunk = _mm256_maskz_loadu_epi8(rem_mask, from);
_mm256_mask_storeu_epi8(out, rem_mask, chunk);
return out + rem;

Check warning on line 76 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L73-L76

Added lines #L73 - L76 were not covered by tests
}

loadchunk(from, &chunk);
_mm256_mask_storeu_epi8(out, rem_mask, chunk);
rem = (rem == 0) ? sizeof(chunk_t) : rem;
storechunk(out, &chunk);

Check warning on line 81 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L81

Added line #L81 was not covered by tests
out += rem;
from += rem;
len -= rem;
Expand Down Expand Up @@ -123,10 +125,6 @@
return ret_vec;
}

static inline void loadhalfchunk(uint8_t const *s, halfchunk_t *chunk) {
*chunk = _mm_loadu_si128((__m128i *)s);
}

static inline void storehalfchunk(uint8_t *out, halfchunk_t *chunk) {
_mm_storeu_si128((__m128i *)out, *chunk);
}
Expand All @@ -152,27 +150,18 @@

static inline uint8_t* HALFCHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
Assert(len > 0, "chunkcopy should never have a length 0");

unsigned rem = len % sizeof(halfchunk_t);
halfmask_t rem_mask = gen_half_mask(rem);

/* Since this is only ever called if dist >= a chunk, we don't need a masked load */
halfchunk_t chunk;
loadhalfchunk(from, &chunk);
_mm_mask_storeu_epi8(out, rem_mask, chunk);
out += rem;
from += rem;
len -= rem;

while (len > 0) {
loadhalfchunk(from, &chunk);
storehalfchunk(out, &chunk);
out += sizeof(halfchunk_t);
from += sizeof(halfchunk_t);
len -= sizeof(halfchunk_t);
uint32_t rem = len % sizeof(halfchunk_t);

Check warning on line 155 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L155

Added line #L155 was not covered by tests
if (rem == 0) {
rem = sizeof(halfchunk_t);

Check warning on line 157 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L157

Added line #L157 was not covered by tests
}

return out;
halfmask_t rem_mask = gen_half_mask(rem);
chunk = _mm_maskz_loadu_epi8(rem_mask, from);
_mm_mask_storeu_epi8(out, rem_mask, chunk);

Check warning on line 162 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L160-L162

Added lines #L160 - L162 were not covered by tests

return out + rem;

Check warning on line 164 in arch/x86/chunkset_avx512.c

View check run for this annotation

Codecov / codecov/patch

arch/x86/chunkset_avx512.c#L164

Added line #L164 was not covered by tests
}

#define CHUNKSIZE chunksize_avx512
Expand Down
4 changes: 0 additions & 4 deletions chunkset_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,7 @@ static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) {
rem_bytes:
#endif
if (len) {
#ifndef HAVE_MASKED_READWRITE
memcpy(out, &chunk_load, len);
#else
storechunk_mask(out, gen_mask(len), &chunk_load);
#endif
out += len;
}

Expand Down
Loading