Skip to content

Commit

Permalink
Implement brotli's version of lz_extend.
Browse files Browse the repository at this point in the history
This provides a 2-3% speed up for compression.

Before:

```
Compression             Total  Compressed        | Compression                   |
Method                   Size        Size  Ratio | Iters       Time         Rate |
----------------------------------------------------------------------------------
libdeflate-gzip:1   270062086    85630362  3.154 |    20   26.493 s  194.43 MB/s |
libdeflate-gzip:2   270062086    84037129  3.214 |    20   35.747 s  144.10 MB/s |
libdeflate-gzip:3   270062086    82391861  3.278 |    20   39.707 s  129.73 MB/s |
libdeflate-gzip:4   270062086    81420541  3.317 |    20   43.029 s  119.71 MB/s |
libdeflate-gzip:5   270062086    78832080  3.426 |    20   50.630 s  101.74 MB/s |
libdeflate-gzip:6   270062086    78021372  3.461 |    20   63.719 s   80.84 MB/s |
libdeflate-gzip:7   270062086    77594012  3.480 |    20   87.918 s   58.59 MB/s |
libdeflate-gzip:8   270062086    77190199  3.499 |    20  147.452 s   34.93 MB/s |
libdeflate-gzip:9   270062086    77156775  3.500 |    20  191.025 s   26.97 MB/s |
```

After:

```
Compression             Total  Compressed        | Compression                   |
Method                   Size        Size  Ratio | Iters       Time         Rate |
----------------------------------------------------------------------------------
libdeflate-gzip:1   270062086    85630362  3.154 |    20   26.228 s  196.39 MB/s |
libdeflate-gzip:2   270062086    84037129  3.214 |    20   34.950 s  147.38 MB/s |
libdeflate-gzip:3   270062086    82391861  3.278 |    20   39.140 s  131.61 MB/s |
libdeflate-gzip:4   270062086    81420541  3.317 |    20   41.927 s  122.86 MB/s |
libdeflate-gzip:5   270062086    78832080  3.426 |    20   50.023 s  102.97 MB/s |
libdeflate-gzip:6   270062086    78021372  3.461 |    20   61.799 s   83.35 MB/s |
libdeflate-gzip:7   270062086    77594012  3.480 |    20   85.528 s   60.23 MB/s |
libdeflate-gzip:8   270062086    77190199  3.499 |    20  145.867 s   35.31 MB/s |
libdeflate-gzip:9   270062086    77156775  3.500 |    20  189.208 s   27.22 MB/s |
```
  • Loading branch information
LucasSloan committed Mar 28, 2024
1 parent 275aa51 commit 32a7739
Showing 1 changed file with 25 additions and 37 deletions.
62 changes: 25 additions & 37 deletions lib/matchfinder_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,49 +176,37 @@ lz_hash(u32 seq, unsigned num_bits)
* to a maximum of @max_len. Initially, @start_len bytes are matched.
*/
static forceinline unsigned
lz_extend(const u8 * const strptr, const u8 * const matchptr,
const unsigned start_len, const unsigned max_len)
lz_extend(const u8 * strptr, const u8 * matchptr,
const unsigned start_len, unsigned max_len)
{
unsigned len = start_len;
machine_word_t v_word;
const u8 * const matchptr_orig = matchptr;

strptr += start_len;
matchptr += start_len;
max_len -= start_len;
if (UNALIGNED_ACCESS_IS_FAST) {

if (likely(max_len - len >= 4 * WORDBYTES)) {

#define COMPARE_WORD_STEP \
v_word = load_word_unaligned(&matchptr[len]) ^ \
load_word_unaligned(&strptr[len]); \
if (v_word != 0) \
goto word_differs; \
len += WORDBYTES; \

COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
COMPARE_WORD_STEP
#undef COMPARE_WORD_STEP
}

while (len + WORDBYTES <= max_len) {
v_word = load_word_unaligned(&matchptr[len]) ^
load_word_unaligned(&strptr[len]);
if (v_word != 0)
goto word_differs;
len += WORDBYTES;
for (; max_len >= WORDBYTES; max_len -= WORDBYTES) {
machine_word_t v_word = load_word_unaligned(strptr) ^
load_word_unaligned(matchptr);
strptr += WORDBYTES;
if (v_word != 0) {
unsigned matching_bits;
if (CPU_IS_LITTLE_ENDIAN())
matching_bits = bsfw(v_word);
else
matching_bits = WORDBITS - 1 - bsrw(v_word);
return (unsigned)(matchptr - matchptr_orig) + (matching_bits >> 3);
}
matchptr += WORDBYTES;
}
}

while (len < max_len && matchptr[len] == strptr[len])
len++;
return len;

word_differs:
if (CPU_IS_LITTLE_ENDIAN())
len += (bsfw(v_word) >> 3);
else
len += (WORDBITS - 1 - bsrw(v_word)) >> 3;
return len;
while (max_len && *matchptr == *strptr) {
max_len--;
++strptr;
++matchptr;
}
return (unsigned)(matchptr - matchptr_orig);
}

#endif /* LIB_MATCHFINDER_COMMON_H */

0 comments on commit 32a7739

Please sign in to comment.