diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/zstd/lib/dictBuilder | |
parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
download | ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/zstd/lib/dictBuilder')
-rw-r--r-- | contrib/libs/zstd/lib/dictBuilder/cover.c | 298 | ||||
-rw-r--r-- | contrib/libs/zstd/lib/dictBuilder/cover.h | 100 | ||||
-rw-r--r-- | contrib/libs/zstd/lib/dictBuilder/divsufsort.h | 10 | ||||
-rw-r--r-- | contrib/libs/zstd/lib/dictBuilder/fastcover.c | 116 | ||||
-rw-r--r-- | contrib/libs/zstd/lib/dictBuilder/zdict.c | 208 |
5 files changed, 366 insertions, 366 deletions
diff --git a/contrib/libs/zstd/lib/dictBuilder/cover.c b/contrib/libs/zstd/lib/dictBuilder/cover.c index 028802a1b0..0ae26febe8 100644 --- a/contrib/libs/zstd/lib/dictBuilder/cover.c +++ b/contrib/libs/zstd/lib/dictBuilder/cover.c @@ -30,9 +30,9 @@ # define ZDICT_STATIC_LINKING_ONLY #endif -#include "../common/mem.h" /* read */ -#include "../common/pool.h" -#include "../common/threading.h" +#include "../common/mem.h" /* read */ +#include "../common/pool.h" +#include "../common/threading.h" #include "../common/zstd_internal.h" /* includes zstd.h */ #include "../zdict.h" #include "cover.h" @@ -543,10 +543,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) { * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 0 on success or error code on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `COVER_ctx_destroy()`. */ -static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, unsigned d, double splitPoint) { const BYTE *const samples = (const BYTE *)samplesBuffer; @@ -561,17 +561,17 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Zero the context */ memset(ctx, 0, sizeof(*ctx)); @@ -594,7 +594,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); COVER_ctx_destroy(ctx); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } ctx->freqs = NULL; ctx->d = d; @@ -641,7 +641,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); ctx->freqs = ctx->suffix; ctx->suffix = NULL; - return 0; + return 0; } void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) @@ -746,11 +746,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( /* Checks */ if (!COVER_checkParameters(parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -758,18 +758,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( return ERROR(dstSize_tooSmall); } /* Initialize context and activeDmers */ - { - size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, - parameters.d, parameters.splitPoint); - if (ZSTD_isError(initVal)) { - return initVal; - } + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Building dictionary\n"); @@ -830,7 +830,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, cctx, dst, dstCapacity, samples + offsets[i], samplesSizes[i], cdict); if (ZSTD_isError(size)) { - totalCompressedSize = size; + totalCompressedSize = size; goto _compressCleanup; } totalCompressedSize += size; @@ -906,11 +906,11 @@ void COVER_best_start(COVER_best_t *best) { * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, - COVER_dictSelection_t selection) { - void* dict = selection.dictContent; - size_t compressedSize = selection.totalCompressedSize; - size_t dictSize = selection.dictSize; +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) { + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; if (!best) { return; } @@ -941,7 +941,7 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, best->dictSize = dictSize; best->parameters = parameters; best->compressedSize = compressedSize; - } + } } if (liveJobs == 0) { ZSTD_pthread_cond_broadcast(&best->cond); @@ -950,111 +950,111 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, } } -COVER_dictSelection_t COVER_dictSelectionError(size_t error) { - COVER_dictSelection_t selection = { NULL, 0, error }; - return selection; -} - -unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { - return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); -} - -void COVER_dictSelectionFree(COVER_dictSelection_t selection){ - free(selection.dictContent); -} - +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + COVER_dictSelection_t selection = { NULL, 0, error }; + return selection; +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, - size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, - size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { - - size_t largestDict = 0; - size_t largestCompressed = 0; - BYTE* customDictContentEnd = customDictContent + dictContentSize; - + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity); BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity); - double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; - - if (!largestDictbuffer || !candidateDictBuffer) { - free(largestDictbuffer); - free(candidateDictBuffer); - return COVER_dictSelectionError(dictContentSize); - } - - /* Initial dictionary size and compressed size */ - memcpy(largestDictbuffer, customDictContent, dictContentSize); - dictContentSize = ZDICT_finalizeDictionary( + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, - samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); - - if (ZDICT_isError(dictContentSize)) { - free(largestDictbuffer); - free(candidateDictBuffer); - return COVER_dictSelectionError(dictContentSize); - } - - totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, - samplesBuffer, offsets, - nbCheckSamples, nbSamples, - largestDictbuffer, dictContentSize); - - if (ZSTD_isError(totalCompressedSize)) { - free(largestDictbuffer); - free(candidateDictBuffer); - return COVER_dictSelectionError(totalCompressedSize); - } - - if (params.shrinkDict == 0) { - COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; - free(candidateDictBuffer); - return selection; - } - - largestDict = dictContentSize; - largestCompressed = totalCompressedSize; - dictContentSize = ZDICT_DICTSIZE_MIN; - - /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ - while (dictContentSize < largestDict) { - memcpy(candidateDictBuffer, largestDictbuffer, largestDict); - dictContentSize = ZDICT_finalizeDictionary( + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, - samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); - - if (ZDICT_isError(dictContentSize)) { - free(largestDictbuffer); - free(candidateDictBuffer); - return COVER_dictSelectionError(dictContentSize); - - } - - totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, - samplesBuffer, offsets, - nbCheckSamples, nbSamples, - candidateDictBuffer, dictContentSize); - - if (ZSTD_isError(totalCompressedSize)) { - free(largestDictbuffer); - free(candidateDictBuffer); - return COVER_dictSelectionError(totalCompressedSize); - } - - if (totalCompressedSize <= largestCompressed * regressionTolerance) { - COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; - free(largestDictbuffer); - return selection; - } - dictContentSize *= 2; - } - dictContentSize = largestDict; - totalCompressedSize = largestCompressed; - { - COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; - free(candidateDictBuffer); - return selection; - } -} - + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (totalCompressedSize <= largestCompressed * regressionTolerance) { + COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; + free(largestDictbuffer); + return selection; + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } +} + /** * Parameters for COVER_tryParameters(). */ @@ -1081,7 +1081,7 @@ static void COVER_tryParameters(void *opaque) /* Allocate space for hash table, dict, and freqs */ COVER_map_t activeDmers; BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); - COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); @@ -1098,20 +1098,20 @@ static void COVER_tryParameters(void *opaque) const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, dictBufferCapacity, parameters); selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, - totalCompressedSize); - - if (COVER_dictSelectionIsError(selection)) { - DISPLAYLEVEL(1, "Failed to select dictionary\n"); + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); goto _cleanup; } } _cleanup: - free(dict); - COVER_best_finish(data->best, parameters, selection); + free(dict); + COVER_best_finish(data->best, parameters, selection); free(data); COVER_map_destroy(&activeDmers); - COVER_dictSelectionFree(selection); + COVER_dictSelectionFree(selection); free(freqs); } @@ -1132,7 +1132,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); const unsigned kIterations = (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); - const unsigned shrinkDict = 0; + const unsigned shrinkDict = 0; /* Local variables */ const int displayLevel = parameters->zParams.notificationLevel; unsigned iteration = 1; @@ -1145,15 +1145,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -1177,14 +1177,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Initialize the context for this value of d */ COVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - { - const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); - if (ZSTD_isError(initVal)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return initVal; - } + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); @@ -1201,7 +1201,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( COVER_best_destroy(&best); COVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; @@ -1211,7 +1211,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( data->parameters.d = d; data->parameters.splitPoint = splitPoint; data->parameters.steps = kSteps; - data->parameters.shrinkDict = shrinkDict; + data->parameters.shrinkDict = shrinkDict; data->parameters.zParams.notificationLevel = g_displayLevel; /* Check the parameters */ if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { diff --git a/contrib/libs/zstd/lib/dictBuilder/cover.h b/contrib/libs/zstd/lib/dictBuilder/cover.h index 1aacdddd6f..44e47d1328 100644 --- a/contrib/libs/zstd/lib/dictBuilder/cover.h +++ b/contrib/libs/zstd/lib/dictBuilder/cover.h @@ -1,13 +1,13 @@ -/* +/* * Copyright (c) Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + #ifndef ZDICT_STATIC_LINKING_ONLY # define ZDICT_STATIC_LINKING_ONLY #endif @@ -16,10 +16,10 @@ #include <stdlib.h> /* malloc, free, qsort */ #include <string.h> /* memset */ #include <time.h> /* clock */ -#include "../common/mem.h" /* read */ -#include "../common/pool.h" -#include "../common/threading.h" -#include "../common/zstd_internal.h" /* includes zstd.h */ +#include "../common/mem.h" /* read */ +#include "../common/pool.h" +#include "../common/threading.h" +#include "../common/zstd_internal.h" /* includes zstd.h */ #include "../zdict.h" /** @@ -58,15 +58,15 @@ typedef struct { } COVER_epoch_info_t; /** - * Struct used for the dictionary selection function. - */ -typedef struct COVER_dictSelection { - BYTE* dictContent; - size_t dictSize; - size_t totalCompressedSize; -} COVER_dictSelection_t; - -/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** * Computes the number of epochs and the size of each epoch. * We will make sure that each epoch gets at least 10 * k bytes. * @@ -127,32 +127,32 @@ void COVER_best_start(COVER_best_t *best); * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, - COVER_dictSelection_t selection); -/** - * Error function for COVER_selectDict function. Checks if the return - * value is an error. - */ -unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); - - /** - * Error function for COVER_selectDict function. Returns a struct where - * return.totalCompressedSize is a ZSTD error. - */ -COVER_dictSelection_t COVER_dictSelectionError(size_t error); - -/** - * Always call after selectDict is called to free up used memory from - * newly created dictionary. - */ -void COVER_dictSelectionFree(COVER_dictSelection_t selection); - -/** - * Called to finalize the dictionary and select one based on whether or not - * the shrink-dict flag was enabled. If enabled the dictionary used is the - * smallest dictionary within a specified regression of the compressed size - * from the largest dictionary. - */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, - size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, - size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); diff --git a/contrib/libs/zstd/lib/dictBuilder/divsufsort.h b/contrib/libs/zstd/lib/dictBuilder/divsufsort.h index 5440994af1..d387c944f0 100644 --- a/contrib/libs/zstd/lib/dictBuilder/divsufsort.h +++ b/contrib/libs/zstd/lib/dictBuilder/divsufsort.h @@ -36,8 +36,8 @@ extern "C" { /** * Constructs the suffix array of a given string. - * @param T [0..n-1] The input string. - * @param SA [0..n-1] The output array of suffixes. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. * @param n The length of the given string. * @param openMP enables OpenMP optimization. * @return 0 if no error occurred, -1 or -2 otherwise. @@ -47,9 +47,9 @@ divsufsort(const unsigned char *T, int *SA, int n, int openMP); /** * Constructs the burrows-wheeler transformed string of a given string. - * @param T [0..n-1] The input string. - * @param U [0..n-1] The output string. (can be T) - * @param A [0..n-1] The temporary array. (can be NULL) + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) * @param n The length of the given string. * @param num_indexes The length of secondary indexes array. (can be NULL) * @param indexes The secondary indexes array. (can be NULL) diff --git a/contrib/libs/zstd/lib/dictBuilder/fastcover.c b/contrib/libs/zstd/lib/dictBuilder/fastcover.c index 3352859ada..7f21ecfdc3 100644 --- a/contrib/libs/zstd/lib/dictBuilder/fastcover.c +++ b/contrib/libs/zstd/lib/dictBuilder/fastcover.c @@ -1,13 +1,13 @@ -/* +/* * Copyright (c) Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + /*-************************************* * Dependencies ***************************************/ @@ -20,10 +20,10 @@ # define ZDICT_STATIC_LINKING_ONLY #endif -#include "../common/mem.h" /* read */ -#include "../common/pool.h" -#include "../common/threading.h" -#include "../common/zstd_internal.h" /* includes zstd.h */ +#include "../common/mem.h" /* read */ +#include "../common/pool.h" +#include "../common/threading.h" +#include "../common/zstd_internal.h" /* includes zstd.h */ #include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */ #include "../zdict.h" #include "cover.h" @@ -306,10 +306,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 0 on success or error code on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `FASTCOVER_ctx_destroy()`. */ -static size_t +static size_t FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, @@ -329,19 +329,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } /* Zero the context */ @@ -366,7 +366,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->offsets == NULL) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); FASTCOVER_ctx_destroy(ctx); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } /* Fill offsets from the samplesSizes */ @@ -383,13 +383,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->freqs == NULL) { DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); FASTCOVER_ctx_destroy(ctx); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Computing frequencies\n"); FASTCOVER_computeFrequency(ctx->freqs, ctx); - return 0; + return 0; } @@ -482,7 +482,7 @@ static void FASTCOVER_tryParameters(void* opaque) U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); /* Allocate space for hash table, dict, and freqs */ BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); - COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); if (!segmentFreqs || !dict || !freqs) { DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); @@ -492,24 +492,24 @@ static void FASTCOVER_tryParameters(void* opaque) memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); /* Build the dictionary */ { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, - parameters, segmentFreqs); - + parameters, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, - totalCompressedSize); - - if (COVER_dictSelectionIsError(selection)) { - DISPLAYLEVEL(1, "Failed to select dictionary\n"); + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); goto _cleanup; } } _cleanup: - free(dict); - COVER_best_finish(data->best, parameters, selection); + free(dict); + COVER_best_finish(data->best, parameters, selection); free(data); free(segmentFreqs); - COVER_dictSelectionFree(selection); + COVER_dictSelectionFree(selection); free(freqs); } @@ -524,7 +524,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, coverParams->nbThreads = fastCoverParams.nbThreads; coverParams->splitPoint = fastCoverParams.splitPoint; coverParams->zParams = fastCoverParams.zParams; - coverParams->shrinkDict = fastCoverParams.shrinkDict; + coverParams->shrinkDict = fastCoverParams.shrinkDict; } @@ -541,7 +541,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, fastCoverParams->f = f; fastCoverParams->accel = accel; fastCoverParams->zParams = coverParams.zParams; - fastCoverParams->shrinkDict = coverParams.shrinkDict; + fastCoverParams->shrinkDict = coverParams.shrinkDict; } @@ -568,11 +568,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, parameters.accel)) { DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -582,14 +582,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, /* Assign corresponding FASTCOVER_accel_t to accelParams*/ accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; /* Initialize context */ - { - size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, coverParams.d, parameters.splitPoint, parameters.f, - accelParams); - if (ZSTD_isError(initVal)) { - DISPLAYLEVEL(1, "Failed to initialize context\n"); - return initVal; - } + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ @@ -637,7 +637,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; - const unsigned shrinkDict = 0; + const unsigned shrinkDict = 0; /* Local variables */ const int displayLevel = (int)parameters->zParams.notificationLevel; unsigned iteration = 1; @@ -649,19 +649,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); - return ERROR(parameter_outOfBound); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); - return ERROR(srcSize_wrong); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", @@ -688,14 +688,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Initialize the context for this value of d */ FASTCOVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - { - size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); - if (ZSTD_isError(initVal)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return initVal; - } + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); @@ -712,7 +712,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( COVER_best_destroy(&best); FASTCOVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(memory_allocation); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; @@ -722,7 +722,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( data->parameters.d = d; data->parameters.splitPoint = splitPoint; data->parameters.steps = kSteps; - data->parameters.shrinkDict = shrinkDict; + data->parameters.shrinkDict = shrinkDict; data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel; /* Check the parameters */ if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, diff --git a/contrib/libs/zstd/lib/dictBuilder/zdict.c b/contrib/libs/zstd/lib/dictBuilder/zdict.c index 587df6b861..9ae0608c0c 100644 --- a/contrib/libs/zstd/lib/dictBuilder/zdict.c +++ b/contrib/libs/zstd/lib/dictBuilder/zdict.c @@ -1,12 +1,12 @@ /* * Copyright (c) Yann Collet, Facebook, Inc. - * All rights reserved. - * + * All rights reserved. + * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. - */ + */ /*-************************************** @@ -46,11 +46,11 @@ #endif #define HUF_STATIC_LINKING_ONLY -#include "../common/mem.h" /* read */ -#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */ -#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */ -#include "../common/zstd_internal.h" /* includes zstd.h */ -#include <contrib/libs/xxhash/xxhash.h> /* XXH64 */ +#include "../common/mem.h" /* read */ +#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */ +#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */ +#include "../common/zstd_internal.h" /* includes zstd.h */ +#include <contrib/libs/xxhash/xxhash.h> /* XXH64 */ #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */ #include "../zdict.h" #include "divsufsort.h" @@ -63,7 +63,7 @@ #define MB *(1 <<20) #define GB *(1U<<30) -#define DICTLISTSIZE_DEFAULT 10000 +#define DICTLISTSIZE_DEFAULT 10000 #define NOISELENGTH 32 @@ -76,18 +76,18 @@ static const U32 g_selectivity_default = 9; #undef DISPLAY #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } #undef DISPLAYLEVEL -#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } -static void ZDICT_printHex(const void* ptr, size_t length) +static void ZDICT_printHex(const void* ptr, size_t length) { const BYTE* const b = (const BYTE*)ptr; size_t u; for (u=0; u<length; u++) { BYTE c = b[u]; if (c<32 || c>126) c = '.'; /* non-printable char */ - DISPLAY("%c", c); + DISPLAY("%c", c); } } @@ -99,34 +99,34 @@ unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } -unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) -{ - if (dictSize < 8) return 0; +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dictBuffer + 4); -} - -size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) -{ - size_t headerSize; - if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); - + return MEM_readLE32((const char*)dictBuffer + 4); +} + +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); - U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); if (!bs || !wksp) { - headerSize = ERROR(memory_allocation); - } else { - ZSTD_reset_compressedBlockState(bs); + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); - } - - free(bs); - free(wksp); - } - - return headerSize; -} - + } + + free(bs); + free(wksp); + } + + return headerSize; +} + /*-******************************************************** * Dictionary training functions **********************************************************/ @@ -248,7 +248,7 @@ static void ZDICT_initDictItem(dictItem* d) static dictItem ZDICT_analyzePos( BYTE* doneMarks, const int* suffix, U32 start, - const void* buffer, U32 minRatio, U32 notificationLevel) + const void* buffer, U32 minRatio, U32 notificationLevel) { U32 lengthList[LLIMIT] = {0}; U32 cumulLength[LLIMIT] = {0}; @@ -435,23 +435,23 @@ static int isIncluded(const void* in, const void* container, size_t length) static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer) { const U32 tableSize = table->pos; - const U32 eltEnd = elt.pos + elt.length; + const U32 eltEnd = elt.pos + elt.length; const char* const buf = (const char*) buffer; /* tail overlap */ U32 u; for (u=1; u<tableSize; u++) { if (u==eltNbToSkip) continue; - if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ /* append */ U32 const addedLength = table[u].pos - elt.pos; table[u].length += addedLength; table[u].pos = elt.pos; table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ - table[u].savings += elt.length / 8; /* rough approx bonus */ + table[u].savings += elt.length / 8; /* rough approx bonus */ elt = table[u]; - /* sort : improve rank */ + /* sort : improve rank */ while ((u>1) && (table[u-1].savings < elt.savings)) - table[u] = table[u-1], u--; + table[u] = table[u-1], u--; table[u] = elt; return u; } } @@ -460,15 +460,15 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const for (u=1; u<tableSize; u++) { if (u==eltNbToSkip) continue; - if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ /* append */ int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length); - table[u].savings += elt.length / 8; /* rough approx bonus */ - if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ table[u].length += addedLength; table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ } - /* sort : improve rank */ + /* sort : improve rank */ elt = table[u]; while ((u>1) && (table[u-1].savings < elt.savings)) table[u] = table[u-1], u--; @@ -552,15 +552,15 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */ U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos)); size_t result = 0; - clock_t displayClock = 0; - clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10; + clock_t displayClock = 0; + clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10; # undef DISPLAYUPDATE -# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \ - if (ZDICT_clockSpan(displayClock) > refreshRate) \ - { displayClock = clock(); DISPLAY(__VA_ARGS__); \ +# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ if (notificationLevel>=4) fflush(stderr); } } - + /* init */ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { @@ -585,8 +585,8 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, { size_t pos; for (pos=0; pos < bufferSize; pos++) reverseSuffix[suffix[pos]] = (U32)pos; - /* note filePos tracks borders between samples. - It's not used at this stage, but planned to become useful in a later update */ + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ filePos[0] = 0; for (pos=1; pos<nbFiles; pos++) filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]); @@ -598,7 +598,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize, { U32 cursor; for (cursor=0; cursor < bufferSize; ) { dictItem solution; if (doneMarks[cursor]) { cursor++; continue; } - solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel); + solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel); if (solution.length==0) { cursor++; continue; } ZDICT_insertDictItem(dictList, dictListSize, solution, buffer); cursor += solution.length; @@ -616,12 +616,12 @@ _cleanup: static void ZDICT_fillNoise(void* buffer, size_t length) { - unsigned const prime1 = 2654435761U; - unsigned const prime2 = 2246822519U; - unsigned acc = prime1; + unsigned const prime1 = 2654435761U; + unsigned const prime2 = 2246822519U; + unsigned acc = prime1; size_t p=0; for (p=0; p<length; p++) { - acc *= prime2; + acc *= prime2; ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); } } @@ -636,19 +636,19 @@ typedef struct #define MAXREPOFFSET 1024 -static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, const void* src, size_t srcSize, U32 notificationLevel) { - size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); size_t cSize; if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } - } + } cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } @@ -731,9 +731,9 @@ static void ZDICT_flatLit(unsigned* countLit) #define OFFCODE_MAX 30 /* only applicable to first block */ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, int compressionLevel, - const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, - const void* dictBuffer, size_t dictBufferSize, - unsigned notificationLevel) + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) { unsigned countLit[256]; HUF_CREATE_STATIC_CTABLE(hufTable, 255); @@ -752,7 +752,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, size_t pos = 0, errorCode; size_t eSize = 0; size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); - size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); BYTE* dstPtr = (BYTE*)dstBuffer; /* init */ @@ -779,10 +779,10 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* collect stats on all samples */ for (u=0; u<nbFiles; u++) { - ZDICT_countEStats(esr, ¶ms, - countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset, - (const char*)srcBuffer + pos, fileSizes[u], - notificationLevel); + ZDICT_countEStats(esr, ¶ms, + countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset, + (const char*)srcBuffer + pos, fileSizes[u], + notificationLevel); pos += fileSizes[u]; } @@ -796,7 +796,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* analyze, build stats, starting with literals */ { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); if (HUF_isError(maxNbBits)) { - eSize = maxNbBits; + eSize = maxNbBits; DISPLAYLEVEL(1, " HUF_buildCTable error \n"); goto _cleanup; } @@ -819,8 +819,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { - eSize = errorCode; - DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); + eSize = errorCode; + DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); goto _cleanup; } Offlog = (U32)errorCode; @@ -828,8 +828,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { - eSize = errorCode; - DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); + eSize = errorCode; + DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); goto _cleanup; } mlLog = (U32)errorCode; @@ -837,8 +837,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1); if (FSE_isError(errorCode)) { - eSize = errorCode; - DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); + eSize = errorCode; + DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); goto _cleanup; } llLog = (U32)errorCode; @@ -846,8 +846,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* write result to buffer */ { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); if (HUF_isError(hhSize)) { - eSize = hhSize; - DISPLAYLEVEL(1, "HUF_writeCTable error \n"); + eSize = hhSize; + DISPLAYLEVEL(1, "HUF_writeCTable error \n"); goto _cleanup; } dstPtr += hhSize; @@ -857,8 +857,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); if (FSE_isError(ohSize)) { - eSize = ohSize; - DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); + eSize = ohSize; + DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); goto _cleanup; } dstPtr += ohSize; @@ -868,8 +868,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); if (FSE_isError(mhSize)) { - eSize = mhSize; - DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); + eSize = mhSize; + DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); goto _cleanup; } dstPtr += mhSize; @@ -879,8 +879,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); if (FSE_isError(lhSize)) { - eSize = lhSize; - DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); + eSize = lhSize; + DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); goto _cleanup; } dstPtr += lhSize; @@ -889,8 +889,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, } if (maxDstSize<12) { - eSize = ERROR(dstSize_tooSmall); - DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); + eSize = ERROR(dstSize_tooSmall); + DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); goto _cleanup; } # if 0 @@ -936,7 +936,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */ BYTE header[HBUFFSIZE]; int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; - U32 const notificationLevel = params.notificationLevel; + U32 const notificationLevel = params.notificationLevel; /* The final dictionary content must be at least as large as the largest repcode */ size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue); size_t paddingSize; @@ -1021,14 +1021,14 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced( /* calculate entropy tables */ DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ DISPLAYLEVEL(2, "statistics ... \n"); - { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, compressionLevel, samplesBuffer, samplesSizes, nbSamples, - (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, - notificationLevel); - if (ZDICT_isError(eSize)) return eSize; - hSize += eSize; - } + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } /* add dictionary header (after entropy tables) */ MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); @@ -1037,7 +1037,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced( U32 const dictID = params.dictID ? params.dictID : compliantID; MEM_writeLE32((char*)dictBuffer+4, dictID); } - + if (hSize + dictContentSize < dictBufferCapacity) memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); return MIN(dictBufferCapacity, hSize+dictContentSize); @@ -1052,7 +1052,7 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy( const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t params) { - U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; @@ -1081,18 +1081,18 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy( unsigned const dictContentSize = ZDICT_dictSize(dictList); unsigned u; DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); - DISPLAYLEVEL(3, "list %u best segments \n", nb-1); - for (u=1; u<nb; u++) { + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u<nb; u++) { unsigned const pos = dictList[u].pos; unsigned const length = dictList[u].length; - U32 const printedLength = MIN(40, length); + U32 const printedLength = MIN(40, length); if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) { free(dictList); - return ERROR(GENERIC); /* should never happen */ + return ERROR(GENERIC); /* should never happen */ } DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", u, length, pos, (unsigned)dictList[u].savings); - ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); DISPLAYLEVEL(3, "| \n"); } } @@ -1110,11 +1110,11 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy( } } - if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { unsigned proposedSelectivity = selectivity-1; while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); - DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); } @@ -1160,7 +1160,7 @@ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, size_t result; void* newBuff; size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); - if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ newBuff = malloc(sBuffSize + NOISELENGTH); if (!newBuff) return ERROR(memory_allocation); |