aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/zstd/lib/dictBuilder
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:44:49 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:49 +0300
commit718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/libs/zstd/lib/dictBuilder
parente9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
downloadydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/zstd/lib/dictBuilder')
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/cover.c298
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/cover.h100
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/divsufsort.h10
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/fastcover.c116
-rw-r--r--contrib/libs/zstd/lib/dictBuilder/zdict.c208
5 files changed, 366 insertions, 366 deletions
diff --git a/contrib/libs/zstd/lib/dictBuilder/cover.c b/contrib/libs/zstd/lib/dictBuilder/cover.c
index 028802a1b0..0ae26febe8 100644
--- a/contrib/libs/zstd/lib/dictBuilder/cover.c
+++ b/contrib/libs/zstd/lib/dictBuilder/cover.c
@@ -30,9 +30,9 @@
# define ZDICT_STATIC_LINKING_ONLY
#endif
-#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
+#include "../common/mem.h" /* read */
+#include "../common/pool.h"
+#include "../common/threading.h"
#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../zdict.h"
#include "cover.h"
@@ -543,10 +543,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
- * Returns 0 on success or error code on error.
+ * Returns 0 on success or error code on error.
* The context must be destroyed with `COVER_ctx_destroy()`.
*/
-static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
unsigned d, double splitPoint) {
const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -561,17 +561,17 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Zero the context */
memset(ctx, 0, sizeof(*ctx));
@@ -594,7 +594,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
COVER_ctx_destroy(ctx);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
ctx->freqs = NULL;
ctx->d = d;
@@ -641,7 +641,7 @@ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
ctx->freqs = ctx->suffix;
ctx->suffix = NULL;
- return 0;
+ return 0;
}
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -746,11 +746,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
/* Checks */
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -758,18 +758,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
return ERROR(dstSize_tooSmall);
}
/* Initialize context and activeDmers */
- {
- size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
- parameters.d, parameters.splitPoint);
- if (ZSTD_isError(initVal)) {
- return initVal;
- }
+ {
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+ parameters.d, parameters.splitPoint);
+ if (ZSTD_isError(initVal)) {
+ return initVal;
+ }
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
COVER_ctx_destroy(&ctx);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Building dictionary\n");
@@ -830,7 +830,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
cctx, dst, dstCapacity, samples + offsets[i],
samplesSizes[i], cdict);
if (ZSTD_isError(size)) {
- totalCompressedSize = size;
+ totalCompressedSize = size;
goto _compressCleanup;
}
totalCompressedSize += size;
@@ -906,11 +906,11 @@ void COVER_best_start(COVER_best_t *best) {
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
-void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
- COVER_dictSelection_t selection) {
- void* dict = selection.dictContent;
- size_t compressedSize = selection.totalCompressedSize;
- size_t dictSize = selection.dictSize;
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+ COVER_dictSelection_t selection) {
+ void* dict = selection.dictContent;
+ size_t compressedSize = selection.totalCompressedSize;
+ size_t dictSize = selection.dictSize;
if (!best) {
return;
}
@@ -941,7 +941,7 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
best->dictSize = dictSize;
best->parameters = parameters;
best->compressedSize = compressedSize;
- }
+ }
}
if (liveJobs == 0) {
ZSTD_pthread_cond_broadcast(&best->cond);
@@ -950,111 +950,111 @@ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
}
}
-COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
- COVER_dictSelection_t selection = { NULL, 0, error };
- return selection;
-}
-
-unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
- return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
-}
-
-void COVER_dictSelectionFree(COVER_dictSelection_t selection){
- free(selection.dictContent);
-}
-
+COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
+ COVER_dictSelection_t selection = { NULL, 0, error };
+ return selection;
+}
+
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
+ return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
+}
+
+void COVER_dictSelectionFree(COVER_dictSelection_t selection){
+ free(selection.dictContent);
+}
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
- size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
- size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
-
- size_t largestDict = 0;
- size_t largestCompressed = 0;
- BYTE* customDictContentEnd = customDictContent + dictContentSize;
-
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
+
+ size_t largestDict = 0;
+ size_t largestCompressed = 0;
+ BYTE* customDictContentEnd = customDictContent + dictContentSize;
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity);
BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity);
- double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
-
- if (!largestDictbuffer || !candidateDictBuffer) {
- free(largestDictbuffer);
- free(candidateDictBuffer);
- return COVER_dictSelectionError(dictContentSize);
- }
-
- /* Initial dictionary size and compressed size */
- memcpy(largestDictbuffer, customDictContent, dictContentSize);
- dictContentSize = ZDICT_finalizeDictionary(
+ double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
+
+ if (!largestDictbuffer || !candidateDictBuffer) {
+ free(largestDictbuffer);
+ free(candidateDictBuffer);
+ return COVER_dictSelectionError(dictContentSize);
+ }
+
+ /* Initial dictionary size and compressed size */
+ memcpy(largestDictbuffer, customDictContent, dictContentSize);
+ dictContentSize = ZDICT_finalizeDictionary(
largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize,
- samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
-
- if (ZDICT_isError(dictContentSize)) {
- free(largestDictbuffer);
- free(candidateDictBuffer);
- return COVER_dictSelectionError(dictContentSize);
- }
-
- totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
- samplesBuffer, offsets,
- nbCheckSamples, nbSamples,
- largestDictbuffer, dictContentSize);
-
- if (ZSTD_isError(totalCompressedSize)) {
- free(largestDictbuffer);
- free(candidateDictBuffer);
- return COVER_dictSelectionError(totalCompressedSize);
- }
-
- if (params.shrinkDict == 0) {
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
- free(candidateDictBuffer);
- return selection;
- }
-
- largestDict = dictContentSize;
- largestCompressed = totalCompressedSize;
- dictContentSize = ZDICT_DICTSIZE_MIN;
-
- /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
- while (dictContentSize < largestDict) {
- memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
- dictContentSize = ZDICT_finalizeDictionary(
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+ if (ZDICT_isError(dictContentSize)) {
+ free(largestDictbuffer);
+ free(candidateDictBuffer);
+ return COVER_dictSelectionError(dictContentSize);
+ }
+
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+ samplesBuffer, offsets,
+ nbCheckSamples, nbSamples,
+ largestDictbuffer, dictContentSize);
+
+ if (ZSTD_isError(totalCompressedSize)) {
+ free(largestDictbuffer);
+ free(candidateDictBuffer);
+ return COVER_dictSelectionError(totalCompressedSize);
+ }
+
+ if (params.shrinkDict == 0) {
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+ free(candidateDictBuffer);
+ return selection;
+ }
+
+ largestDict = dictContentSize;
+ largestCompressed = totalCompressedSize;
+ dictContentSize = ZDICT_DICTSIZE_MIN;
+
+ /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
+ while (dictContentSize < largestDict) {
+ memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
+ dictContentSize = ZDICT_finalizeDictionary(
candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize,
- samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
-
- if (ZDICT_isError(dictContentSize)) {
- free(largestDictbuffer);
- free(candidateDictBuffer);
- return COVER_dictSelectionError(dictContentSize);
-
- }
-
- totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
- samplesBuffer, offsets,
- nbCheckSamples, nbSamples,
- candidateDictBuffer, dictContentSize);
-
- if (ZSTD_isError(totalCompressedSize)) {
- free(largestDictbuffer);
- free(candidateDictBuffer);
- return COVER_dictSelectionError(totalCompressedSize);
- }
-
- if (totalCompressedSize <= largestCompressed * regressionTolerance) {
- COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
- free(largestDictbuffer);
- return selection;
- }
- dictContentSize *= 2;
- }
- dictContentSize = largestDict;
- totalCompressedSize = largestCompressed;
- {
- COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
- free(candidateDictBuffer);
- return selection;
- }
-}
-
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
+
+ if (ZDICT_isError(dictContentSize)) {
+ free(largestDictbuffer);
+ free(candidateDictBuffer);
+ return COVER_dictSelectionError(dictContentSize);
+
+ }
+
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
+ samplesBuffer, offsets,
+ nbCheckSamples, nbSamples,
+ candidateDictBuffer, dictContentSize);
+
+ if (ZSTD_isError(totalCompressedSize)) {
+ free(largestDictbuffer);
+ free(candidateDictBuffer);
+ return COVER_dictSelectionError(totalCompressedSize);
+ }
+
+ if (totalCompressedSize <= largestCompressed * regressionTolerance) {
+ COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
+ free(largestDictbuffer);
+ return selection;
+ }
+ dictContentSize *= 2;
+ }
+ dictContentSize = largestDict;
+ totalCompressedSize = largestCompressed;
+ {
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
+ free(candidateDictBuffer);
+ return selection;
+ }
+}
+
/**
* Parameters for COVER_tryParameters().
*/
@@ -1081,7 +1081,7 @@ static void COVER_tryParameters(void *opaque)
/* Allocate space for hash table, dict, and freqs */
COVER_map_t activeDmers;
BYTE* const dict = (BYTE*)malloc(dictBufferCapacity);
- COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32));
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -1098,20 +1098,20 @@ static void COVER_tryParameters(void *opaque)
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
dictBufferCapacity, parameters);
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
- totalCompressedSize);
-
- if (COVER_dictSelectionIsError(selection)) {
- DISPLAYLEVEL(1, "Failed to select dictionary\n");
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+ totalCompressedSize);
+
+ if (COVER_dictSelectionIsError(selection)) {
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
goto _cleanup;
}
}
_cleanup:
- free(dict);
- COVER_best_finish(data->best, parameters, selection);
+ free(dict);
+ COVER_best_finish(data->best, parameters, selection);
free(data);
COVER_map_destroy(&activeDmers);
- COVER_dictSelectionFree(selection);
+ COVER_dictSelectionFree(selection);
free(freqs);
}
@@ -1132,7 +1132,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
- const unsigned shrinkDict = 0;
+ const unsigned shrinkDict = 0;
/* Local variables */
const int displayLevel = parameters->zParams.notificationLevel;
unsigned iteration = 1;
@@ -1145,15 +1145,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1177,14 +1177,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
/* Initialize the context for this value of d */
COVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- {
- const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
- if (ZSTD_isError(initVal)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- COVER_best_destroy(&best);
- POOL_free(pool);
- return initVal;
- }
+ {
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+ if (ZSTD_isError(initVal)) {
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+ COVER_best_destroy(&best);
+ POOL_free(pool);
+ return initVal;
+ }
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1201,7 +1201,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
COVER_best_destroy(&best);
COVER_ctx_destroy(&ctx);
POOL_free(pool);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
@@ -1211,7 +1211,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
data->parameters.d = d;
data->parameters.splitPoint = splitPoint;
data->parameters.steps = kSteps;
- data->parameters.shrinkDict = shrinkDict;
+ data->parameters.shrinkDict = shrinkDict;
data->parameters.zParams.notificationLevel = g_displayLevel;
/* Check the parameters */
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
diff --git a/contrib/libs/zstd/lib/dictBuilder/cover.h b/contrib/libs/zstd/lib/dictBuilder/cover.h
index 1aacdddd6f..44e47d1328 100644
--- a/contrib/libs/zstd/lib/dictBuilder/cover.h
+++ b/contrib/libs/zstd/lib/dictBuilder/cover.h
@@ -1,13 +1,13 @@
-/*
+/*
* Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
#ifndef ZDICT_STATIC_LINKING_ONLY
# define ZDICT_STATIC_LINKING_ONLY
#endif
@@ -16,10 +16,10 @@
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
-#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
-#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/mem.h" /* read */
+#include "../common/pool.h"
+#include "../common/threading.h"
+#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../zdict.h"
/**
@@ -58,15 +58,15 @@ typedef struct {
} COVER_epoch_info_t;
/**
- * Struct used for the dictionary selection function.
- */
-typedef struct COVER_dictSelection {
- BYTE* dictContent;
- size_t dictSize;
- size_t totalCompressedSize;
-} COVER_dictSelection_t;
-
-/**
+ * Struct used for the dictionary selection function.
+ */
+typedef struct COVER_dictSelection {
+ BYTE* dictContent;
+ size_t dictSize;
+ size_t totalCompressedSize;
+} COVER_dictSelection_t;
+
+/**
* Computes the number of epochs and the size of each epoch.
* We will make sure that each epoch gets at least 10 * k bytes.
*
@@ -127,32 +127,32 @@ void COVER_best_start(COVER_best_t *best);
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
-void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
- COVER_dictSelection_t selection);
-/**
- * Error function for COVER_selectDict function. Checks if the return
- * value is an error.
- */
-unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
-
- /**
- * Error function for COVER_selectDict function. Returns a struct where
- * return.totalCompressedSize is a ZSTD error.
- */
-COVER_dictSelection_t COVER_dictSelectionError(size_t error);
-
-/**
- * Always call after selectDict is called to free up used memory from
- * newly created dictionary.
- */
-void COVER_dictSelectionFree(COVER_dictSelection_t selection);
-
-/**
- * Called to finalize the dictionary and select one based on whether or not
- * the shrink-dict flag was enabled. If enabled the dictionary used is the
- * smallest dictionary within a specified regression of the compressed size
- * from the largest dictionary.
- */
+void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
+ COVER_dictSelection_t selection);
+/**
+ * Error function for COVER_selectDict function. Checks if the return
+ * value is an error.
+ */
+unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
+
+ /**
+ * Error function for COVER_selectDict function. Returns a struct where
+ * return.totalCompressedSize is a ZSTD error.
+ */
+COVER_dictSelection_t COVER_dictSelectionError(size_t error);
+
+/**
+ * Always call after selectDict is called to free up used memory from
+ * newly created dictionary.
+ */
+void COVER_dictSelectionFree(COVER_dictSelection_t selection);
+
+/**
+ * Called to finalize the dictionary and select one based on whether or not
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
+ * smallest dictionary within a specified regression of the compressed size
+ * from the largest dictionary.
+ */
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
- size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
- size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
diff --git a/contrib/libs/zstd/lib/dictBuilder/divsufsort.h b/contrib/libs/zstd/lib/dictBuilder/divsufsort.h
index 5440994af1..d387c944f0 100644
--- a/contrib/libs/zstd/lib/dictBuilder/divsufsort.h
+++ b/contrib/libs/zstd/lib/dictBuilder/divsufsort.h
@@ -36,8 +36,8 @@ extern "C" {
/**
* Constructs the suffix array of a given string.
- * @param T [0..n-1] The input string.
- * @param SA [0..n-1] The output array of suffixes.
+ * @param T [0..n-1] The input string.
+ * @param SA [0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @param openMP enables OpenMP optimization.
* @return 0 if no error occurred, -1 or -2 otherwise.
@@ -47,9 +47,9 @@ divsufsort(const unsigned char *T, int *SA, int n, int openMP);
/**
* Constructs the burrows-wheeler transformed string of a given string.
- * @param T [0..n-1] The input string.
- * @param U [0..n-1] The output string. (can be T)
- * @param A [0..n-1] The temporary array. (can be NULL)
+ * @param T [0..n-1] The input string.
+ * @param U [0..n-1] The output string. (can be T)
+ * @param A [0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param num_indexes The length of secondary indexes array. (can be NULL)
* @param indexes The secondary indexes array. (can be NULL)
diff --git a/contrib/libs/zstd/lib/dictBuilder/fastcover.c b/contrib/libs/zstd/lib/dictBuilder/fastcover.c
index 3352859ada..7f21ecfdc3 100644
--- a/contrib/libs/zstd/lib/dictBuilder/fastcover.c
+++ b/contrib/libs/zstd/lib/dictBuilder/fastcover.c
@@ -1,13 +1,13 @@
-/*
+/*
* Copyright (c) Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under both the BSD-style license (found in the
- * LICENSE file in the root directory of this source tree) and the GPLv2 (found
- * in the COPYING file in the root directory of this source tree).
- * You may select, at your option, one of the above-listed licenses.
- */
-
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
/*-*************************************
* Dependencies
***************************************/
@@ -20,10 +20,10 @@
# define ZDICT_STATIC_LINKING_ONLY
#endif
-#include "../common/mem.h" /* read */
-#include "../common/pool.h"
-#include "../common/threading.h"
-#include "../common/zstd_internal.h" /* includes zstd.h */
+#include "../common/mem.h" /* read */
+#include "../common/pool.h"
+#include "../common/threading.h"
+#include "../common/zstd_internal.h" /* includes zstd.h */
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
#include "../zdict.h"
#include "cover.h"
@@ -306,10 +306,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
- * Returns 0 on success or error code on error.
+ * Returns 0 on success or error code on error.
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
*/
-static size_t
+static size_t
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
@@ -329,19 +329,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
/* Zero the context */
@@ -366,7 +366,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
if (ctx->offsets == NULL) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
FASTCOVER_ctx_destroy(ctx);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
/* Fill offsets from the samplesSizes */
@@ -383,13 +383,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
if (ctx->freqs == NULL) {
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
FASTCOVER_ctx_destroy(ctx);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Computing frequencies\n");
FASTCOVER_computeFrequency(ctx->freqs, ctx);
- return 0;
+ return 0;
}
@@ -482,7 +482,7 @@ static void FASTCOVER_tryParameters(void* opaque)
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
/* Allocate space for hash table, dict, and freqs */
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
- COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
if (!segmentFreqs || !dict || !freqs) {
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@@ -492,24 +492,24 @@ static void FASTCOVER_tryParameters(void* opaque)
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
/* Build the dictionary */
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
- parameters, segmentFreqs);
-
+ parameters, segmentFreqs);
+
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
- ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
- totalCompressedSize);
-
- if (COVER_dictSelectionIsError(selection)) {
- DISPLAYLEVEL(1, "Failed to select dictionary\n");
+ ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
+ totalCompressedSize);
+
+ if (COVER_dictSelectionIsError(selection)) {
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
goto _cleanup;
}
}
_cleanup:
- free(dict);
- COVER_best_finish(data->best, parameters, selection);
+ free(dict);
+ COVER_best_finish(data->best, parameters, selection);
free(data);
free(segmentFreqs);
- COVER_dictSelectionFree(selection);
+ COVER_dictSelectionFree(selection);
free(freqs);
}
@@ -524,7 +524,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
coverParams->nbThreads = fastCoverParams.nbThreads;
coverParams->splitPoint = fastCoverParams.splitPoint;
coverParams->zParams = fastCoverParams.zParams;
- coverParams->shrinkDict = fastCoverParams.shrinkDict;
+ coverParams->shrinkDict = fastCoverParams.shrinkDict;
}
@@ -541,7 +541,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
fastCoverParams->f = f;
fastCoverParams->accel = accel;
fastCoverParams->zParams = coverParams.zParams;
- fastCoverParams->shrinkDict = coverParams.shrinkDict;
+ fastCoverParams->shrinkDict = coverParams.shrinkDict;
}
@@ -568,11 +568,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
parameters.accel)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -582,14 +582,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
/* Initialize context */
- {
- size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+ {
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
coverParams.d, parameters.splitPoint, parameters.f,
- accelParams);
- if (ZSTD_isError(initVal)) {
- DISPLAYLEVEL(1, "Failed to initialize context\n");
- return initVal;
- }
+ accelParams);
+ if (ZSTD_isError(initVal)) {
+ DISPLAYLEVEL(1, "Failed to initialize context\n");
+ return initVal;
+ }
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
/* Build the dictionary */
@@ -637,7 +637,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
- const unsigned shrinkDict = 0;
+ const unsigned shrinkDict = 0;
/* Local variables */
const int displayLevel = (int)parameters->zParams.notificationLevel;
unsigned iteration = 1;
@@ -649,19 +649,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
- return ERROR(parameter_outOfBound);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
- return ERROR(srcSize_wrong);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@@ -688,14 +688,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
/* Initialize the context for this value of d */
FASTCOVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- {
- size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
- if (ZSTD_isError(initVal)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- COVER_best_destroy(&best);
- POOL_free(pool);
- return initVal;
- }
+ {
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+ if (ZSTD_isError(initVal)) {
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+ COVER_best_destroy(&best);
+ POOL_free(pool);
+ return initVal;
+ }
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@@ -712,7 +712,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
COVER_best_destroy(&best);
FASTCOVER_ctx_destroy(&ctx);
POOL_free(pool);
- return ERROR(memory_allocation);
+ return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
@@ -722,7 +722,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
data->parameters.d = d;
data->parameters.splitPoint = splitPoint;
data->parameters.steps = kSteps;
- data->parameters.shrinkDict = shrinkDict;
+ data->parameters.shrinkDict = shrinkDict;
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
/* Check the parameters */
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
diff --git a/contrib/libs/zstd/lib/dictBuilder/zdict.c b/contrib/libs/zstd/lib/dictBuilder/zdict.c
index 587df6b861..9ae0608c0c 100644
--- a/contrib/libs/zstd/lib/dictBuilder/zdict.c
+++ b/contrib/libs/zstd/lib/dictBuilder/zdict.c
@@ -1,12 +1,12 @@
/*
* Copyright (c) Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
+ * All rights reserved.
+ *
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
- */
+ */
/*-**************************************
@@ -46,11 +46,11 @@
#endif
#define HUF_STATIC_LINKING_ONLY
-#include "../common/mem.h" /* read */
-#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
-#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
-#include "../common/zstd_internal.h" /* includes zstd.h */
-#include <contrib/libs/xxhash/xxhash.h> /* XXH64 */
+#include "../common/mem.h" /* read */
+#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
+#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
+#include "../common/zstd_internal.h" /* includes zstd.h */
+#include <contrib/libs/xxhash/xxhash.h> /* XXH64 */
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
#include "../zdict.h"
#include "divsufsort.h"
@@ -63,7 +63,7 @@
#define MB *(1 <<20)
#define GB *(1U<<30)
-#define DICTLISTSIZE_DEFAULT 10000
+#define DICTLISTSIZE_DEFAULT 10000
#define NOISELENGTH 32
@@ -76,18 +76,18 @@ static const U32 g_selectivity_default = 9;
#undef DISPLAY
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
#undef DISPLAYLEVEL
-#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
+#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
-static void ZDICT_printHex(const void* ptr, size_t length)
+static void ZDICT_printHex(const void* ptr, size_t length)
{
const BYTE* const b = (const BYTE*)ptr;
size_t u;
for (u=0; u<length; u++) {
BYTE c = b[u];
if (c<32 || c>126) c = '.'; /* non-printable char */
- DISPLAY("%c", c);
+ DISPLAY("%c", c);
}
}
@@ -99,34 +99,34 @@ unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
-unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
-{
- if (dictSize < 8) return 0;
+unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
+{
+ if (dictSize < 8) return 0;
if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
- return MEM_readLE32((const char*)dictBuffer + 4);
-}
-
-size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
-{
- size_t headerSize;
- if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
-
+ return MEM_readLE32((const char*)dictBuffer + 4);
+}
+
+size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
+{
+ size_t headerSize;
+ if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
- U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
+ U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
if (!bs || !wksp) {
- headerSize = ERROR(memory_allocation);
- } else {
- ZSTD_reset_compressedBlockState(bs);
+ headerSize = ERROR(memory_allocation);
+ } else {
+ ZSTD_reset_compressedBlockState(bs);
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
- }
-
- free(bs);
- free(wksp);
- }
-
- return headerSize;
-}
-
+ }
+
+ free(bs);
+ free(wksp);
+ }
+
+ return headerSize;
+}
+
/*-********************************************************
* Dictionary training functions
**********************************************************/
@@ -248,7 +248,7 @@ static void ZDICT_initDictItem(dictItem* d)
static dictItem ZDICT_analyzePos(
BYTE* doneMarks,
const int* suffix, U32 start,
- const void* buffer, U32 minRatio, U32 notificationLevel)
+ const void* buffer, U32 minRatio, U32 notificationLevel)
{
U32 lengthList[LLIMIT] = {0};
U32 cumulLength[LLIMIT] = {0};
@@ -435,23 +435,23 @@ static int isIncluded(const void* in, const void* container, size_t length)
static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
{
const U32 tableSize = table->pos;
- const U32 eltEnd = elt.pos + elt.length;
+ const U32 eltEnd = elt.pos + elt.length;
const char* const buf = (const char*) buffer;
/* tail overlap */
U32 u; for (u=1; u<tableSize; u++) {
if (u==eltNbToSkip) continue;
- if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
+ if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
/* append */
U32 const addedLength = table[u].pos - elt.pos;
table[u].length += addedLength;
table[u].pos = elt.pos;
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
- table[u].savings += elt.length / 8; /* rough approx bonus */
+ table[u].savings += elt.length / 8; /* rough approx bonus */
elt = table[u];
- /* sort : improve rank */
+ /* sort : improve rank */
while ((u>1) && (table[u-1].savings < elt.savings))
- table[u] = table[u-1], u--;
+ table[u] = table[u-1], u--;
table[u] = elt;
return u;
} }
@@ -460,15 +460,15 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
for (u=1; u<tableSize; u++) {
if (u==eltNbToSkip) continue;
- if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
+ if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
/* append */
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
- table[u].savings += elt.length / 8; /* rough approx bonus */
- if (addedLength > 0) { /* otherwise, elt fully included into existing */
+ table[u].savings += elt.length / 8; /* rough approx bonus */
+ if (addedLength > 0) { /* otherwise, elt fully included into existing */
table[u].length += addedLength;
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
}
- /* sort : improve rank */
+ /* sort : improve rank */
elt = table[u];
while ((u>1) && (table[u-1].savings < elt.savings))
table[u] = table[u-1], u--;
@@ -552,15 +552,15 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */
U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
size_t result = 0;
- clock_t displayClock = 0;
- clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
+ clock_t displayClock = 0;
+ clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
# undef DISPLAYUPDATE
-# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
- if (ZDICT_clockSpan(displayClock) > refreshRate) \
- { displayClock = clock(); DISPLAY(__VA_ARGS__); \
+# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
+ if (ZDICT_clockSpan(displayClock) > refreshRate) \
+ { displayClock = clock(); DISPLAY(__VA_ARGS__); \
if (notificationLevel>=4) fflush(stderr); } }
-
+
/* init */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) {
@@ -585,8 +585,8 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
{ size_t pos;
for (pos=0; pos < bufferSize; pos++)
reverseSuffix[suffix[pos]] = (U32)pos;
- /* note filePos tracks borders between samples.
- It's not used at this stage, but planned to become useful in a later update */
+ /* note filePos tracks borders between samples.
+ It's not used at this stage, but planned to become useful in a later update */
filePos[0] = 0;
for (pos=1; pos<nbFiles; pos++)
filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
@@ -598,7 +598,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
{ U32 cursor; for (cursor=0; cursor < bufferSize; ) {
dictItem solution;
if (doneMarks[cursor]) { cursor++; continue; }
- solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
+ solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
if (solution.length==0) { cursor++; continue; }
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
cursor += solution.length;
@@ -616,12 +616,12 @@ _cleanup:
static void ZDICT_fillNoise(void* buffer, size_t length)
{
- unsigned const prime1 = 2654435761U;
- unsigned const prime2 = 2246822519U;
- unsigned acc = prime1;
+ unsigned const prime1 = 2654435761U;
+ unsigned const prime2 = 2246822519U;
+ unsigned acc = prime1;
size_t p=0;
for (p=0; p<length; p++) {
- acc *= prime2;
+ acc *= prime2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
}
}
@@ -636,19 +636,19 @@ typedef struct
#define MAXREPOFFSET 1024
-static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
+static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
const void* src, size_t srcSize,
U32 notificationLevel)
{
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
size_t cSize;
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
{ size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
- }
+ }
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
@@ -731,9 +731,9 @@ static void ZDICT_flatLit(unsigned* countLit)
#define OFFCODE_MAX 30 /* only applicable to first block */
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
int compressionLevel,
- const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
- const void* dictBuffer, size_t dictBufferSize,
- unsigned notificationLevel)
+ const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
+ const void* dictBuffer, size_t dictBufferSize,
+ unsigned notificationLevel)
{
unsigned countLit[256];
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
@@ -752,7 +752,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
size_t pos = 0, errorCode;
size_t eSize = 0;
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
- size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
+ size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
BYTE* dstPtr = (BYTE*)dstBuffer;
/* init */
@@ -779,10 +779,10 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* collect stats on all samples */
for (u=0; u<nbFiles; u++) {
- ZDICT_countEStats(esr, &params,
- countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
- (const char*)srcBuffer + pos, fileSizes[u],
- notificationLevel);
+ ZDICT_countEStats(esr, &params,
+ countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
+ (const char*)srcBuffer + pos, fileSizes[u],
+ notificationLevel);
pos += fileSizes[u];
}
@@ -796,7 +796,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* analyze, build stats, starting with literals */
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
if (HUF_isError(maxNbBits)) {
- eSize = maxNbBits;
+ eSize = maxNbBits;
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
goto _cleanup;
}
@@ -819,8 +819,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
- eSize = errorCode;
- DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
+ eSize = errorCode;
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
goto _cleanup;
}
Offlog = (U32)errorCode;
@@ -828,8 +828,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
- eSize = errorCode;
- DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
+ eSize = errorCode;
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
goto _cleanup;
}
mlLog = (U32)errorCode;
@@ -837,8 +837,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
if (FSE_isError(errorCode)) {
- eSize = errorCode;
- DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
+ eSize = errorCode;
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
goto _cleanup;
}
llLog = (U32)errorCode;
@@ -846,8 +846,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
/* write result to buffer */
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
if (HUF_isError(hhSize)) {
- eSize = hhSize;
- DISPLAYLEVEL(1, "HUF_writeCTable error \n");
+ eSize = hhSize;
+ DISPLAYLEVEL(1, "HUF_writeCTable error \n");
goto _cleanup;
}
dstPtr += hhSize;
@@ -857,8 +857,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
if (FSE_isError(ohSize)) {
- eSize = ohSize;
- DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
+ eSize = ohSize;
+ DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
goto _cleanup;
}
dstPtr += ohSize;
@@ -868,8 +868,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
if (FSE_isError(mhSize)) {
- eSize = mhSize;
- DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
+ eSize = mhSize;
+ DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
goto _cleanup;
}
dstPtr += mhSize;
@@ -879,8 +879,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
if (FSE_isError(lhSize)) {
- eSize = lhSize;
- DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
+ eSize = lhSize;
+ DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
goto _cleanup;
}
dstPtr += lhSize;
@@ -889,8 +889,8 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
}
if (maxDstSize<12) {
- eSize = ERROR(dstSize_tooSmall);
- DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
+ eSize = ERROR(dstSize_tooSmall);
+ DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
goto _cleanup;
}
# if 0
@@ -936,7 +936,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
BYTE header[HBUFFSIZE];
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
- U32 const notificationLevel = params.notificationLevel;
+ U32 const notificationLevel = params.notificationLevel;
/* The final dictionary content must be at least as large as the largest repcode */
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
size_t paddingSize;
@@ -1021,14 +1021,14 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
/* calculate entropy tables */
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
DISPLAYLEVEL(2, "statistics ... \n");
- { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
+ { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
compressionLevel,
samplesBuffer, samplesSizes, nbSamples,
- (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
- notificationLevel);
- if (ZDICT_isError(eSize)) return eSize;
- hSize += eSize;
- }
+ (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
+ notificationLevel);
+ if (ZDICT_isError(eSize)) return eSize;
+ hSize += eSize;
+ }
/* add dictionary header (after entropy tables) */
MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
@@ -1037,7 +1037,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
U32 const dictID = params.dictID ? params.dictID : compliantID;
MEM_writeLE32((char*)dictBuffer+4, dictID);
}
-
+
if (hSize + dictContentSize < dictBufferCapacity)
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
return MIN(dictBufferCapacity, hSize+dictContentSize);
@@ -1052,7 +1052,7 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy(
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_legacy_params_t params)
{
- U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
+ U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
@@ -1081,18 +1081,18 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy(
unsigned const dictContentSize = ZDICT_dictSize(dictList);
unsigned u;
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
- DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
- for (u=1; u<nb; u++) {
+ DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
+ for (u=1; u<nb; u++) {
unsigned const pos = dictList[u].pos;
unsigned const length = dictList[u].length;
- U32 const printedLength = MIN(40, length);
+ U32 const printedLength = MIN(40, length);
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
free(dictList);
- return ERROR(GENERIC); /* should never happen */
+ return ERROR(GENERIC); /* should never happen */
}
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
u, length, pos, (unsigned)dictList[u].savings);
- ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
+ ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
DISPLAYLEVEL(3, "| \n");
} }
@@ -1110,11 +1110,11 @@ static size_t ZDICT_trainFromBuffer_unsafe_legacy(
}
}
- if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
+ if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
unsigned proposedSelectivity = selectivity-1;
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
- DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
+ DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
}
@@ -1160,7 +1160,7 @@ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
size_t result;
void* newBuff;
size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
- if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */
+ if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */
newBuff = malloc(sBuffSize + NOISELENGTH);
if (!newBuff) return ERROR(memory_allocation);