-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Improve compression performance by conditionally skipping lazy evaluation #4533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,8 +31,15 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV | |
| { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ | ||
| { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ | ||
| { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is too disruptive, and will make maintenance of these tables more complex for future patches. |
||
| /* Larger windows and settings from next levels to improve ratio. | ||
| * Accounts for ratio drop from selective lazy evaluation */ | ||
| { 23, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 6 */ | ||
| { 23, 19, 20, 4, 5, 16, ZSTD_lazy }, /* level 7 */ | ||
| #else | ||
| { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ | ||
| { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ | ||
| #endif | ||
| { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ | ||
| { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ | ||
| { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ | ||
|
|
@@ -57,8 +64,13 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV | |
| { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ | ||
| { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ | ||
| { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| { 20, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 6 */ | ||
| { 20, 18, 19, 4, 5, 8, ZSTD_lazy }, /* level 7 */ | ||
| #else | ||
| { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ | ||
| { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ | ||
| #endif | ||
| { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ | ||
| { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ | ||
| { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ | ||
|
|
@@ -83,8 +95,13 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV | |
| { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ | ||
| { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ | ||
| { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| { 19, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ | ||
| { 19, 16, 17, 4, 5, 16, ZSTD_lazy }, /* level 7 */ | ||
| #else | ||
| { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ | ||
| { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ | ||
| #endif | ||
| { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ | ||
| { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ | ||
| { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5260,12 +5260,22 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, | |
| ZSTD_buffered_policy_e zbuff) | ||
| { | ||
| size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| int compressionLevel = 0; | ||
| #endif | ||
| #if ZSTD_TRACE | ||
| cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; | ||
| #endif | ||
| DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); | ||
| /* params are supposed to be fully validated at this point */ | ||
| assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| compressionLevel = (params->compressionLevel == 0) ? cctx->requestedParams.compressionLevel | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. create a block which both starts and end |
||
| : params->compressionLevel; | ||
| compressionLevel = MIN(MAX(0, compressionLevel), ZSTD_maxCLevel()); | ||
| assert(compressionLevel >= 0 && compressionLevel <= ZSTD_MAX_CLEVEL); | ||
| cctx->seqStore.lazyLimit = ZSTD_compressFastLazyLimit[compressionLevel]; | ||
| #endif | ||
| assert(!((dict) && (cdict))); /* either dict or cdict, not both */ | ||
| if ( (cdict) | ||
| && (cdict->dictContentSize > 0) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,38 @@ | |
| The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. | ||
| This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ | ||
|
|
||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| /* Lazy evaluation is performed only if the first match length | ||
| * is <= ZSTD_compressFastLazyLimit */ | ||
| #define ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT 5 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems way too low. In fact, in many cases, the search is targeting 5-byte matches, so all matches would trigger this condition, effectively changing |
||
|
|
||
| #define ZSTD_MAX_CLEVEL 22 | ||
| static const size_t ZSTD_compressFastLazyLimit[ZSTD_MAX_CLEVEL + 1] = { | ||
| ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT, /* base for negative levels */ | ||
| 0, /* level 1 */ | ||
| 0, /* level 2 */ | ||
| 0, /* level 3 */ | ||
| 0, /* level 4 */ | ||
| 0, /* level 5 */ | ||
| ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT, /* level 6 */ | ||
| ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT + 1, /* level 7 */ | ||
| 0, /* level 8.*/ | ||
| 0, /* level 9.*/ | ||
| 0, /* level 10.*/ | ||
| 0, /* level 11.*/ | ||
| 0, /* level 12.*/ | ||
| 0, /* level 13 */ | ||
| 0, /* level 14 */ | ||
| 0, /* level 15 */ | ||
| 0, /* level 16 */ | ||
| 0, /* level 17 */ | ||
| 0, /* level 18 */ | ||
| 0, /* level 19 */ | ||
| 0, /* level 20 */ | ||
| 0, /* level 21 */ | ||
| 0, /* level 22 */ | ||
| }; | ||
| #endif | ||
|
|
||
| /*-************************************* | ||
| * Context memory management | ||
|
|
@@ -105,6 +137,9 @@ typedef struct { | |
| BYTE* ofCode; | ||
| size_t maxNbSeq; | ||
| size_t maxNbLit; | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| size_t lazyLimit; /* Match length limit to allow lazy evaluation */ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is unwelcome, I would prefer we avoid adding a conditional variable here. |
||
| #endif | ||
|
|
||
| /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength | ||
| * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1590,6 +1590,9 @@ size_t ZSTD_compressBlock_lazy_generic( | |
| prefixLowestIndex - (U32)(dictEnd - dictBase) : | ||
| 0; | ||
| const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. too many such |
||
| size_t lazyLimit = seqStore->lazyLimit; | ||
| #endif | ||
|
|
||
| DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); | ||
| ip += (dictAndPrefixLength == 0); | ||
|
|
@@ -1669,7 +1672,11 @@ size_t ZSTD_compressBlock_lazy_generic( | |
| } | ||
|
|
||
| /* let's try to find a better solution */ | ||
| #ifdef ZSTD_ENABLE_COMPRESS_FAST | ||
| if ((lazyLimit == 0 || matchLength <= lazyLimit) && depth >= 1) /* restrict lazy eval to short matches only */ | ||
| #else | ||
| if (depth>=1) | ||
| #endif | ||
| while (ip<ilimit) { | ||
| DEBUGLOG(7, "search depth 1"); | ||
| ip ++; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,7 +40,7 @@ else | |
| hasMT="true" | ||
| fi | ||
|
|
||
| if zstd -vv --version | grep -q 'non-deterministic'; then | ||
| if zstd -vv --version | grep -q 'non-deterministic' || [ "$ZSTD_COMPRESS_FAST" = "1" ]; then | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing determinism is pretty wild, though I guess it might be acceptable if this setting must be explicitly opt-in. |
||
| NON_DETERMINISTIC="true" | ||
| else | ||
| NON_DETERMINISTIC="" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The macro name is not specific enough. The name implies that this is about the "fast mode", while the changes are actually impacting the
ZSTD_lazystrategy. Consider a more accurate name, or better, if we can find just a better trade off, maybe there would be no need for another build macro.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also see that there is a confusion between a C macro and a Makefile macro.
They are not the same.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, we will address this