diff options
author | thegeorg <thegeorg@yandex-team.com> | 2023-02-27 12:38:58 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2023-02-27 12:38:58 +0300 |
commit | e82ffade6959fe8feaf41fe12a57d9f56873be40 (patch) | |
tree | 784bf61d9ca6cab4beabc995186d9b41e762ae60 /contrib/libs/zstd/programs | |
parent | e58cceed352de42c1526ab4eecdfeee158b1ede3 (diff) | |
download | ydb-e82ffade6959fe8feaf41fe12a57d9f56873be40.tar.gz |
Update contrib/libs/zstd to 1.5.4
Diffstat (limited to 'contrib/libs/zstd/programs')
23 files changed, 2522 insertions, 1180 deletions
diff --git a/contrib/libs/zstd/programs/README.md b/contrib/libs/zstd/programs/README.md index 5570f90c3b..1b9f47cbba 100644 --- a/contrib/libs/zstd/programs/README.md +++ b/contrib/libs/zstd/programs/README.md @@ -164,6 +164,7 @@ Advanced arguments : --filelist FILE : read list of files to operate upon from FILE --output-dir-flat DIR : processed files are stored into DIR --output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure +--[no-]asyncio : use asynchronous IO (default: enabled) --[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate). -- : All arguments after "--" are treated as files @@ -208,7 +209,7 @@ Benchmark arguments : -b# : benchmark file(s), using # compression level (default: 3) -e# : test all compression levels successively from -b# to -e# (default: 1) -i# : minimum evaluation time in seconds (default: 3s) - -B# : cut file into independent blocks of size # (default: no block) + -B# : cut file into independent chunks of size # (default: no chunking) -S : output one benchmark result per input file (default: consolidated result) --priority=rt : set process priority to real-time ``` @@ -275,7 +276,7 @@ compression speed (for lower levels) with minimal change in compression ratio. The below table illustrates this on the [Silesia compression corpus]. -[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia +[Silesia compression corpus]: https://sun.aei.polsl.pl//~sdeor/index.php?page=silesia | Method | Compression ratio | Compression speed | Decompression speed | |:-------|------------------:|------------------:|---------------------:| diff --git a/contrib/libs/zstd/programs/benchfn.c b/contrib/libs/zstd/programs/benchfn.c index 1aadbdd913..8e6726f8dc 100644 --- a/contrib/libs/zstd/programs/benchfn.c +++ b/contrib/libs/zstd/programs/benchfn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -229,9 +229,9 @@ BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, cont->timeSpent_ns += (unsigned long long)loopDuration_ns; /* estimate nbLoops for next run to last approximately 1 second */ - if (loopDuration_ns > (runBudget_ns / 50)) { + if (loopDuration_ns > ((double)runBudget_ns / 50)) { double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); - cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; + cont->nbLoops = (unsigned)((double)runBudget_ns / fastestRun_ns) + 1; } else { /* previous run was too short : blindly increase workload by x multiplier */ const unsigned multiplier = 10; @@ -239,7 +239,7 @@ BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, cont->nbLoops *= multiplier; } - if(loopDuration_ns < runTimeMin_ns) { + if(loopDuration_ns < (double)runTimeMin_ns) { /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ assert(completed == 0); continue; diff --git a/contrib/libs/zstd/programs/benchfn.h b/contrib/libs/zstd/programs/benchfn.h index 590f292eaa..1bd93d1351 100644 --- a/contrib/libs/zstd/programs/benchfn.h +++ b/contrib/libs/zstd/programs/benchfn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -123,7 +123,7 @@ BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); /* when benchmark failed, it means one invocation of `benchFn` failed. * The failure was detected by `errorFn`, operating on return values of `benchFn`. * Returns the faulty return value. - * note : this function will abort() program execution if benchmark did not failed. + * note : this function will abort() program execution if benchmark did not fail. * always check if benchmark failed first ! */ size_t BMK_extract_errorResult(BMK_runOutcome_t outcome); diff --git a/contrib/libs/zstd/programs/benchzstd.c b/contrib/libs/zstd/programs/benchzstd.c index 9dc76a6f3e..63ecd99d51 100644 --- a/contrib/libs/zstd/programs/benchzstd.c +++ b/contrib/libs/zstd/programs/benchzstd.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -13,7 +13,7 @@ * Tuning parameters ****************************************/ #ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ -#define BMK_TIMETEST_DEFAULT_S 3 +# define BMK_TIMETEST_DEFAULT_S 3 #endif @@ -327,26 +327,31 @@ BMK_benchMemAdvancedNoAlloc( /* init */ memset(&benchResult, 0, sizeof(benchResult)); if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */ - if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ + if (adv->mode == BMK_decodeOnly) { + /* benchmark only decompression : source must be already compressed */ const char* srcPtr = (const char*)srcBuffer; U64 totalDSize64 = 0; U32 fileNb; for (fileNb=0; fileNb<nbFiles; fileNb++) { U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); - if (fSize64==0) RETURN_ERROR(32, BMK_benchOutcome_t, "Impossible to determine original size "); + if (fSize64 == ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR(32, BMK_benchOutcome_t, "Decompressed size cannot be determined: cannot benchmark"); + } + if (fSize64 == ZSTD_CONTENTSIZE_ERROR) { + RETURN_ERROR(32, BMK_benchOutcome_t, "Error while trying to assess decompressed size: data may be invalid"); + } totalDSize64 += fSize64; srcPtr += fileSizes[fileNb]; } { size_t const decodedSize = (size_t)totalDSize64; assert((U64)decodedSize == totalDSize64); /* check overflow */ free(*resultBufferPtr); + if (totalDSize64 > decodedSize) { /* size_t overflow */ + RETURN_ERROR(32, BMK_benchOutcome_t, "decompressed size is too large for local system"); + } *resultBufferPtr = malloc(decodedSize); if (!(*resultBufferPtr)) { - RETURN_ERROR(33, BMK_benchOutcome_t, "not enough memory"); - } - if (totalDSize64 > decodedSize) { /* size_t overflow */ - free(*resultBufferPtr); - RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large"); + RETURN_ERROR(33, BMK_benchOutcome_t, "allocation error: not enough memory"); } cSize = srcSize; srcSize = decodedSize; @@ -387,6 +392,10 @@ BMK_benchMemAdvancedNoAlloc( RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); } + if (!UTIL_support_MT_measurements() && adv->nbWorkers > 1) { + OUTPUTLEVEL(2, "Warning : time measurements may be incorrect in multithreading mode... \n") + } + /* Bench */ { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); # define NB_MARKS 4 @@ -442,7 +451,7 @@ BMK_benchMemAdvancedNoAlloc( BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp); if (!BMK_isSuccessful_runOutcome(cOutcome)) { - return BMK_benchOutcome_error(); + RETURN_ERROR(30, BMK_benchOutcome_t, "compression error"); } { BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); @@ -470,7 +479,7 @@ BMK_benchMemAdvancedNoAlloc( BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); if(!BMK_isSuccessful_runOutcome(dOutcome)) { - return BMK_benchOutcome_error(); + RETURN_ERROR(30, BMK_benchOutcome_t, "decompression error"); } { BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); @@ -594,7 +603,7 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, void* resultBuffer = srcSize ? malloc(srcSize) : NULL; - int allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || + int const allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || !cSizes || !cCapacities || !resPtrs || !resSizes || !timeStateCompress || !timeStateDecompress || !cctx || !dctx || diff --git a/contrib/libs/zstd/programs/benchzstd.h b/contrib/libs/zstd/programs/benchzstd.h index 11ac85da7f..aa683dfc25 100644 --- a/contrib/libs/zstd/programs/benchzstd.h +++ b/contrib/libs/zstd/programs/benchzstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/programs/datagen.c b/contrib/libs/zstd/programs/datagen.c index 3b4f9e5c7b..ddc690bb1b 100644 --- a/contrib/libs/zstd/programs/datagen.c +++ b/contrib/libs/zstd/programs/datagen.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/programs/datagen.h b/contrib/libs/zstd/programs/datagen.h index b76ae2a222..ca72700063 100644 --- a/contrib/libs/zstd/programs/datagen.h +++ b/contrib/libs/zstd/programs/datagen.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/programs/dibio.c b/contrib/libs/zstd/programs/dibio.c index d19f954486..26ebe5ca1d 100644 --- a/contrib/libs/zstd/programs/dibio.c +++ b/contrib/libs/zstd/programs/dibio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -27,10 +27,11 @@ #include <string.h> /* memset */ #include <stdio.h> /* fprintf, fopen, ftello64 */ #include <errno.h> /* errno */ -#include <assert.h> #include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ +#include "../lib/common/debug.h" /* assert */ #include "../lib/common/mem.h" /* read */ +#include "../lib/zstd_errors.h" #include "dibio.h" @@ -127,8 +128,11 @@ static int DiB_loadFiles( while ( nbSamplesLoaded < sstSize && fileIndex < nbFiles ) { size_t fileDataLoaded; S64 const fileSize = DiB_getFileSize(fileNamesTable[fileIndex]); - if (fileSize <= 0) /* skip if zero-size or file error */ + if (fileSize <= 0) { + /* skip if zero-size or file error */ + ++fileIndex; continue; + } f = fopen( fileNamesTable[fileIndex], "rb"); if (f == NULL) @@ -193,7 +197,8 @@ static U32 DiB_rand(U32* src) static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) { U32 seed = 0xFD2FB528; unsigned i; - assert(nbFiles >= 1); + if (nbFiles == 0) + return; for (i = nbFiles - 1; i > 0; --i) { unsigned const j = DiB_rand(&seed) % (i + 1); const char* const tmp = fileNamesTable[j]; @@ -269,21 +274,20 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t int n; memset(&fs, 0, sizeof(fs)); - // We assume that if chunking is requested, the chunk size is < SAMPLESIZE_MAX + /* We assume that if chunking is requested, the chunk size is < SAMPLESIZE_MAX */ assert( chunkSize <= SAMPLESIZE_MAX ); for (n=0; n<nbFiles; n++) { S64 const fileSize = DiB_getFileSize(fileNamesTable[n]); - // TODO: is there a minimum sample size? What if the file is 1-byte? + /* TODO: is there a minimum sample size? What if the file is 1-byte? */ if (fileSize == 0) { DISPLAYLEVEL(3, "Sample file '%s' has zero size, skipping...\n", fileNamesTable[n]); continue; } /* the case where we are breaking up files in sample chunks */ - if (chunkSize > 0) - { - // TODO: is there a minimum sample size? Can we have a 1-byte sample? + if (chunkSize > 0) { + /* TODO: is there a minimum sample size? Can we have a 1-byte sample? */ fs.nbSamples += (int)((fileSize + chunkSize-1) / chunkSize); fs.totalSizeToLoad += fileSize; } @@ -350,7 +354,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, } /* Checks */ - if ((!sampleSizes) || (!srcBuffer) || (!dictBuffer)) + if ((fs.nbSamples && !sampleSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ if (fs.oneSampleTooLarge) { DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n"); @@ -379,7 +383,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, nbFiles, chunkSize, displayLevel); - { size_t dictSize; + { size_t dictSize = ZSTD_error_GENERIC; if (params) { DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */ dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, @@ -399,8 +403,7 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, *coverParams); } - } else { - assert(fastCoverParams != NULL); + } else if (fastCoverParams != NULL) { if (optimize) { dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, @@ -415,6 +418,8 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, sampleSizes, nbSamplesLoaded, *fastCoverParams); } + } else { + assert(0 /* Impossible */); } if (ZDICT_isError(dictSize)) { DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ diff --git a/contrib/libs/zstd/programs/dibio.h b/contrib/libs/zstd/programs/dibio.h index 666c1e6618..a96104c36d 100644 --- a/contrib/libs/zstd/programs/dibio.h +++ b/contrib/libs/zstd/programs/dibio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/programs/fileio.c b/contrib/libs/zstd/programs/fileio.c index 0a0dfc42cb..3b885bc65f 100644 --- a/contrib/libs/zstd/programs/fileio.c +++ b/contrib/libs/zstd/programs/fileio.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -28,6 +28,7 @@ #include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */ #include <stdlib.h> /* malloc, free */ #include <string.h> /* strcmp, strlen */ +#include <time.h> /* clock_t, to measure process time */ #include <fcntl.h> /* O_WRONLY */ #include <assert.h> #include <errno.h> /* errno */ @@ -40,8 +41,12 @@ # include <io.h> #endif -#include "../lib/common/mem.h" /* U32, U64 */ #include "fileio.h" +#include "fileio_asyncio.h" +#include "fileio_common.h" + +FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto}; +UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ #include "../lib/zstd.h" @@ -65,6 +70,40 @@ # error #include <lz4.h> #endif +char const* FIO_zlibVersion(void) +{ +#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) + return zlibVersion(); +#else + return "Unsupported"; +#endif +} + +char const* FIO_lz4Version(void) +{ +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) + /* LZ4_versionString() added in v1.7.3 */ +# if LZ4_VERSION_NUMBER >= 10703 + return LZ4_versionString(); +# else +# define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +# define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION) + return ZSTD_LZ4_VERSION_STRING; +# endif +#else + return "Unsupported"; +#endif +} + +char const* FIO_lzmaVersion(void) +{ +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) + return lzma_version_string(); +#else + return "Unsupported"; +#endif +} + /*-************************************* * Constants @@ -75,69 +114,17 @@ #define FNSPACE 30 /* Default file permissions 0666 (modulated by umask) */ +/* Temporary restricted file permissions are used when we're going to + * chmod/chown at the end of the operation. */ #if !defined(_WIN32) /* These macros aren't defined on windows. */ #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) +#define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR) #else #define DEFAULT_FILE_PERMISSIONS (0666) +#define TEMPORARY_FILE_PERMISSIONS (0600) #endif -/*-************************************* -* Macros -***************************************/ -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) -#undef MAX -#define MAX(a,b) ((a)>(b) ? (a) : (b)) - -struct FIO_display_prefs_s { - int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ - FIO_progressSetting_e progressSetting; -}; - -static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto}; - -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } } - -static const U64 g_refreshRate = SEC_TO_MICRO / 6; -static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; - -#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) -#define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } -#define DISPLAYUPDATE(l, ...) { \ - if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \ - if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \ - DELAY_NEXT_UPDATE(); \ - DISPLAY(__VA_ARGS__); \ - if (g_display_prefs.displayLevel>=4) fflush(stderr); \ - } } } - -#undef MIN /* in case it would be already defined */ -#define MIN(a,b) ((a) < (b) ? (a) : (b)) - - -#define EXM_THROW(error, ...) \ -{ \ - DISPLAYLEVEL(1, "zstd: "); \ - DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ - DISPLAYLEVEL(1, "error %i : ", error); \ - DISPLAYLEVEL(1, __VA_ARGS__); \ - DISPLAYLEVEL(1, " \n"); \ - exit(error); \ -} - -#define CHECK_V(v, f) \ - v = f; \ - if (ZSTD_isError(v)) { \ - DISPLAYLEVEL(5, "%s \n", #f); \ - EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ - } -#define CHECK(f) { size_t err; CHECK_V(err, f); } - - /*-************************************ * Signal (Ctrl-C trapping) **************************************/ @@ -237,7 +224,7 @@ static void ABRThandler(int sig) { } #endif -void FIO_addAbortHandler() +void FIO_addAbortHandler(void) { #if BACKTRACE_ENABLE signal(SIGABRT, ABRThandler); @@ -248,94 +235,6 @@ void FIO_addAbortHandler() #endif } - -/*-************************************************************ -* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW -***************************************************************/ -#if defined(_MSC_VER) && _MSC_VER >= 1400 -# define LONG_SEEK _fseeki64 -# define LONG_TELL _ftelli64 -#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ -# define LONG_SEEK fseeko -# define LONG_TELL ftello -#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) -# define LONG_SEEK fseeko64 -# define LONG_TELL ftello64 -#elif defined(_WIN32) && !defined(__DJGPP__) -# include <windows.h> - static int LONG_SEEK(FILE* file, __int64 offset, int origin) { - LARGE_INTEGER off; - DWORD method; - off.QuadPart = offset; - if (origin == SEEK_END) - method = FILE_END; - else if (origin == SEEK_CUR) - method = FILE_CURRENT; - else - method = FILE_BEGIN; - - if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) - return 0; - else - return -1; - } - static __int64 LONG_TELL(FILE* file) { - LARGE_INTEGER off, newOff; - off.QuadPart = 0; - newOff.QuadPart = 0; - SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT); - return newOff.QuadPart; - } -#else -# define LONG_SEEK fseek -# define LONG_TELL ftell -#endif - - -/*-************************************* -* Parameters: FIO_prefs_t -***************************************/ - -/* typedef'd to FIO_prefs_t within fileio.h */ -struct FIO_prefs_s { - - /* Algorithm preferences */ - FIO_compressionType_t compressionType; - U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ - int dictIDFlag; - int checksumFlag; - int blockSize; - int overlapLog; - U32 adaptiveMode; - U32 useRowMatchFinder; - int rsyncable; - int minAdaptLevel; - int maxAdaptLevel; - int ldmFlag; - int ldmHashLog; - int ldmMinMatch; - int ldmBucketSizeLog; - int ldmHashRateLog; - size_t streamSrcSize; - size_t targetCBlockSize; - int srcSizeHint; - int testMode; - ZSTD_paramSwitch_e literalCompressionMode; - - /* IO preferences */ - U32 removeSrcFile; - U32 overwrite; - - /* Computation resources preferences */ - unsigned memLimit; - int nbWorkers; - - int excludeCompressedFiles; - int patchFromMode; - int contentSize; - int allowBlockDevices; -}; - /*-************************************* * Parameters: FIO_ctx_t ***************************************/ @@ -355,6 +254,18 @@ struct FIO_ctx_s { size_t totalBytesOutput; }; +static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx) +{ + return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3; +} + +static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx) +{ + int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1); + assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0); + return shouldDisplay; +} + /*-************************************* * Parameters: Initialization @@ -395,6 +306,8 @@ FIO_prefs_t* FIO_createPreferences(void) ret->literalCompressionMode = ZSTD_ps_auto; ret->excludeCompressedFiles = 0; ret->allowBlockDevices = 0; + ret->asyncIO = AIO_supported(); + ret->passThrough = -1; return ret; } @@ -443,13 +356,13 @@ void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t comp void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } -void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; } +void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; } void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } -void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); } +void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); } void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } @@ -476,7 +389,7 @@ void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ prefs->overlapLog = overlapLog; } -void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) { +void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) { if ((adapt>0) && (prefs->nbWorkers==0)) EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); prefs->adaptiveMode = adapt; @@ -558,6 +471,20 @@ void FIO_setContentSize(FIO_prefs_t* const prefs, int value) prefs->contentSize = value != 0; } +void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) { +#ifdef ZSTD_MULTITHREAD + prefs->asyncIO = value; +#else + (void) prefs; + (void) value; + DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n"); +#endif +} + +void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) { + prefs->passThrough = (value != 0); +} + /* FIO_ctx_t functions */ void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { @@ -608,26 +535,26 @@ static int FIO_removeFile(const char* path) /** FIO_openSrcFile() : * condition : `srcFileName` must be non-NULL. `prefs` may be NULL. * @result : FILE* to `srcFileName`, or NULL if it fails */ -static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName) +static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf) { - stat_t statbuf; int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0; assert(srcFileName != NULL); + assert(statbuf != NULL); if (!strcmp (srcFileName, stdinmark)) { DISPLAYLEVEL(4,"Using stdin for input \n"); SET_BINARY_MODE(stdin); return stdin; } - if (!UTIL_stat(srcFileName, &statbuf)) { + if (!UTIL_stat(srcFileName, statbuf)) { DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", srcFileName, strerror(errno)); return NULL; } - if (!UTIL_isRegularFileStat(&statbuf) - && !UTIL_isFIFOStat(&statbuf) - && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf)) + if (!UTIL_isRegularFileStat(statbuf) + && !UTIL_isFIFOStat(statbuf) + && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf)) ) { DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", srcFileName); @@ -685,7 +612,7 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, if (!prefs->overwrite) { if (g_display_prefs.displayLevel <= 1) { /* No interaction possible */ - DISPLAY("zstd: %s already exists; not overwritten \n", + DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n", dstFileName); return NULL; } @@ -718,6 +645,17 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, if (f == NULL) { DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); } + /* An increased buffer size can provide a significant performance boost on some platforms. + * Note that providing a NULL buf with a size that's not 0 is not defined in ANSI C, but is defined + * in an extension. There are three possibilities here - + * 1. Libc supports the extended version and everything is good. + * 2. Libc ignores the size when buf is NULL, in which case everything will continue as if we didn't + * call `setvbuf`. + * 3. We fail the call and execution continues but a warning message might be shown. + * In all cases due execution continues. For now, I believe that this is a more cost-effective + * solution than managing the buffers allocations ourselves (will require an API change). */ + if(setvbuf(f, NULL, _IOFBF, 1 MB)) + DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName); return f; } } @@ -728,23 +666,23 @@ FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, * @return : loaded size * if fileName==NULL, returns 0 and a NULL pointer */ -static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs) +static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat) { FILE* fileHandle; U64 fileSize; - stat_t statbuf; assert(bufferPtr != NULL); + assert(dictFileStat != NULL); *bufferPtr = NULL; if (fileName == NULL) return 0; DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); - if (!UTIL_stat(fileName, &statbuf)) { + if (!UTIL_stat(fileName, dictFileStat)) { EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno)); } - if (!UTIL_isRegularFileStat(&statbuf)) { + if (!UTIL_isRegularFileStat(dictFileStat)) { EXM_THROW(32, "Dictionary %s must be a regular file.", fileName); } @@ -754,7 +692,7 @@ static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_p EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); } - fileSize = UTIL_getFileSizeStat(&statbuf); + fileSize = UTIL_getFileSizeStat(dictFileStat); { size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; if (fileSize > dictSizeMax) { @@ -785,7 +723,7 @@ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); if (!filenameTableSorted) { - DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); + DISPLAYLEVEL(1, "Allocation error during filename collision checking \n"); return 1; } @@ -802,7 +740,7 @@ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { prevElem = filenameTableSorted[0]; for (u = 1; u < nbFiles; ++u) { if (strcmp(prevElem, filenameTableSorted[u]) == 0) { - DISPLAY("WARNING: Two files have same filename: %s\n", prevElem); + DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem); } prevElem = filenameTableSorted[u]; } @@ -885,45 +823,89 @@ static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, FIO_setMemLimit(prefs, (unsigned)maxSize); } -/* FIO_removeMultiFilesWarning() : +/* FIO_multiFilesConcatWarning() : + * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts. * Returns 1 if the console should abort, 0 if console should proceed. - * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts. * - * If -f is specified, or there is just 1 file, zstd will always proceed as usual. - * If --rm is specified, there will be a prompt asking for user confirmation. - * If -f is specified with --rm, zstd will proceed as usual - * If -q is specified with --rm, zstd will abort pre-emptively - * If neither flag is specified, zstd will prompt the user for confirmation to proceed. - * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q). - * However, if the output is stdout, we will always abort rather than displaying the warning prompt. + * If output is stdout or test mode is active, check that `--rm` disabled. + * + * If there is just 1 file to process, zstd will proceed as usual. + * If each file get processed into its own separate destination file, proceed as usual. + * + * When multiple files are processed into a single output, + * display a warning message, then disable --rm if it's set. + * + * If -f is specified or if output is stdout, just proceed. + * If output is set with -o, prompt for confirmation. */ -static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff) +static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff) { - int error = 0; - if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) { - if (g_display_prefs.displayLevel <= displayLevelCutoff) { - if (prefs->removeSrcFile) { - DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s\n", outFileName); - error = 1; - } - } else { - if (!strcmp(outFileName, stdoutmark)) { - DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n"); - } else { - DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName); - } - DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate the original directory tree. \n") - if (prefs->removeSrcFile) { - if (fCtx->hasStdoutOutput) { - DISPLAYLEVEL(1, "Aborting. Use -f if you really want to delete the files and output to stdout\n"); - error = 1; - } else { - error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); - } - } - } + if (fCtx->hasStdoutOutput) { + if (prefs->removeSrcFile) + /* this should not happen ; hard fail, to protect user's data + * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ + EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. " + "This scenario is not supposed to be possible. " + "This is a programming error. File an issue for it to be fixed."); + } + if (prefs->testMode) { + if (prefs->removeSrcFile) + /* this should not happen ; hard fail, to protect user's data + * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */ + EXM_THROW(43, "Test mode shall not remove input files! " + "This scenario is not supposed to be possible. " + "This is a programming error. File an issue for it to be fixed."); + return 0; } - return error; + + if (fCtx->nbFilesTotal == 1) return 0; + assert(fCtx->nbFilesTotal > 1); + + if (!outFileName) return 0; + + if (fCtx->hasStdoutOutput) { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n"); + } else { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName); + } + DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n") + + /* multi-input into single output : --rm is not allowed */ + if (prefs->removeSrcFile) { + DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n"); + prefs->removeSrcFile = 0; + } + + if (fCtx->hasStdoutOutput) return 0; + if (prefs->overwrite) return 0; + + /* multiple files concatenated into single destination file using -o without -f */ + if (g_display_prefs.displayLevel <= displayLevelCutoff) { + /* quiet mode => no prompt => fail automatically */ + DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n"); + DISPLAYLEVEL(1, "Aborting. \n"); + return 1; + } + /* normal mode => prompt */ + return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); +} + +static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos) +{ + ZSTD_inBuffer i; + i.src = buf; + i.size = s; + i.pos = pos; + return i; +} + +static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos) +{ + ZSTD_outBuffer o; + o.dst = buf; + o.size = s; + o.pos = pos; + return o; } #ifndef ZSTD_NOCOMPRESS @@ -932,16 +914,13 @@ static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* * Compression ************************************************************************/ typedef struct { - FILE* srcFile; - FILE* dstFile; - void* srcBuffer; - size_t srcBufferSize; - void* dstBuffer; - size_t dstBufferSize; void* dictBuffer; size_t dictBufferSize; const char* dictFileName; + stat_t dictFileStat; ZSTD_CStream* cctx; + WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; } cRess_t; /** ZSTD_cycleLog() : @@ -973,8 +952,8 @@ static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, if (cParams.strategy >= ZSTD_btopt) { DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n"); - DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n"); - DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); + DISPLAYLEVEL(1, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n"); + DISPLAYLEVEL(1, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n"); } } @@ -990,9 +969,6 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, if (ress.cctx == NULL) EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", strerror(errno)); - ress.srcBufferSize = ZSTD_CStreamInSize(); - ress.srcBuffer = malloc(ress.srcBufferSize); - ress.dstBufferSize = ZSTD_CStreamOutSize(); /* need to update memLimit before calling createDictBuffer * because of memLimit check inside it */ @@ -1000,10 +976,10 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); } - ress.dstBuffer = malloc(ress.dstBufferSize); - ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */ - if (!ress.srcBuffer || !ress.dstBuffer) - EXM_THROW(31, "allocation error : not enough memory"); + ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs, &ress.dictFileStat); /* works with dictFileName==NULL */ + + ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize()); /* Advanced parameters, including dictionary */ if (dictFileName && (ress.dictBuffer==NULL)) @@ -1066,9 +1042,9 @@ static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, static void FIO_freeCResources(const cRess_t* const ress) { - free(ress->srcBuffer); - free(ress->dstBuffer); free(ress->dictBuffer); + AIO_WritePool_free(ress->writeCtx); + AIO_ReadPool_free(ress->readCtx); ZSTD_freeCStream(ress->cctx); /* never fails */ } @@ -1081,6 +1057,7 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no { unsigned long long inFileSize = 0, outFileSize = 0; z_stream strm; + IOJob_t *writeJob = NULL; if (compressionLevel > Z_BEST_COMPRESSION) compressionLevel = Z_BEST_COMPRESSION; @@ -1091,56 +1068,65 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, 15 /* maxWindowLogSize */ + 16 /* gzip only */, - 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ + 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */ if (ret != Z_OK) { EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); } } + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); strm.next_in = 0; strm.avail_in = 0; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; while (1) { int ret; if (strm.avail_in == 0) { - size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); - if (inSize == 0) break; - inFileSize += inSize; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; - strm.avail_in = (uInt)inSize; + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) break; + inFileSize += ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + } + + { + size_t const availBefore = strm.avail_in; + ret = deflate(&strm, Z_NO_FLUSH); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); } - ret = deflate(&strm, Z_NO_FLUSH); + if (ret != Z_OK) EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); - { size_t const cSize = ress->dstBufferSize - strm.avail_out; + { size_t const cSize = writeJob->bufferSize - strm.avail_out; if (cSize) { - if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) - EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno)); + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += cSize; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; - } } + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + DISPLAYUPDATE_PROGRESS( + "\rRead : %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), + (double)outFileSize/(double)inFileSize*100) } else { - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ", - (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + DISPLAYUPDATE_PROGRESS( + "\rRead : %u / %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/(double)inFileSize*100); } } while (1) { int const ret = deflate(&strm, Z_FINISH); - { size_t const cSize = ress->dstBufferSize - strm.avail_out; + { size_t const cSize = writeJob->bufferSize - strm.avail_out; if (cSize) { - if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) - EXM_THROW(75, "Write error : %s ", strerror(errno)); + writeJob->usedBufferSize = cSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += cSize; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; - } } + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + } } if (ret == Z_STREAM_END) break; if (ret != Z_BUF_ERROR) EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); @@ -1151,6 +1137,8 @@ FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but no EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); } } *readsize = inFileSize; + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return outFileSize; } #endif @@ -1166,6 +1154,7 @@ FIO_compressLzmaFrame(cRess_t* ress, lzma_stream strm = LZMA_STREAM_INIT; lzma_action action = LZMA_RUN; lzma_ret ret; + IOJob_t *writeJob = NULL; if (compressionLevel < 0) compressionLevel = 0; if (compressionLevel > 9) compressionLevel = 9; @@ -1183,46 +1172,55 @@ FIO_compressLzmaFrame(cRess_t* ress, EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); } + writeJob =AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; strm.next_in = 0; strm.avail_in = 0; - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; while (1) { if (strm.avail_in == 0) { - size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); - if (inSize == 0) action = LZMA_FINISH; + size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize()); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; inFileSize += inSize; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = inSize; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; + } + + { + size_t const availBefore = strm.avail_in; + ret = lzma_code(&strm, action); + AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in); } - ret = lzma_code(&strm, action); if (ret != LZMA_OK && ret != LZMA_STREAM_END) EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); - { size_t const compBytes = ress->dstBufferSize - strm.avail_out; + { size_t const compBytes = writeJob->bufferSize - strm.avail_out; if (compBytes) { - if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) - EXM_THROW(85, "Write error : %s", strerror(errno)); + writeJob->usedBufferSize = compBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += compBytes; - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; } } if (srcFileSize == UTIL_FILESIZE_UNKNOWN) - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (double)outFileSize/(double)inFileSize*100) else - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + (double)outFileSize/(double)inFileSize*100); if (ret == LZMA_STREAM_END) break; } lzma_end(&strm); *readsize = inFileSize; + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); + return outFileSize; } #endif @@ -1248,15 +1246,18 @@ FIO_compressLz4Frame(cRess_t* ress, LZ4F_preferences_t prefs; LZ4F_compressionContext_t ctx; + IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); if (LZ4F_isError(errorCode)) EXM_THROW(31, "zstd: failed to create lz4 compression context"); memset(&prefs, 0, sizeof(prefs)); - assert(blockSize <= ress->srcBufferSize); + assert(blockSize <= ress->readCtx->base.jobBufferSize); - prefs.autoFlush = 1; + /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */ + prefs.autoFlush = 0; prefs.compressionLevel = compressionLevel; prefs.frameInfo.blockMode = LZ4F_blockLinked; prefs.frameInfo.blockSizeID = LZ4F_max64KB; @@ -1264,75 +1265,68 @@ FIO_compressLz4Frame(cRess_t* ress, #if LZ4_VERSION_NUMBER >= 10600 prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; #endif - assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); + assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize); { - size_t readSize; - size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); + size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs); if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize)); - if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) - EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno)); + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += headerSize; /* Read first block */ - readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); - inFileSize += readSize; + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); /* Main Loop */ - while (readSize>0) { - size_t const outSize = LZ4F_compressUpdate(ctx, - ress->dstBuffer, ress->dstBufferSize, - ress->srcBuffer, readSize, NULL); + while (ress->readCtx->srcBufferLoaded) { + size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize, + ress->readCtx->srcBuffer, inSize, NULL); if (LZ4F_isError(outSize)) EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", srcFileName, LZ4F_getErrorName(outSize)); outFileSize += outSize; if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { - DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), - (double)outFileSize/inFileSize*100) + (double)outFileSize/(double)inFileSize*100) } else { - DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%", (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), - (double)outFileSize/inFileSize*100); + (double)outFileSize/(double)inFileSize*100); } /* Write Block */ - { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); - if (sizeCheck != outSize) - EXM_THROW(36, "Write error : %s", strerror(errno)); - } + writeJob->usedBufferSize = outSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); /* Read next block */ - readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); - inFileSize += readSize; + AIO_ReadPool_consumeBytes(ress->readCtx, inSize); + inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); } - if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); /* End of Stream mark */ - headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); + headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL); if (LZ4F_isError(headerSize)) EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", srcFileName, LZ4F_getErrorName(headerSize)); - { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); - if (sizeCheck != headerSize) - EXM_THROW(39, "Write error : %s (cannot write end of stream)", - strerror(errno)); - } + writeJob->usedBufferSize = headerSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += headerSize; } *readsize = inFileSize; LZ4F_freeCompressionContext(ctx); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return outFileSize; } #endif - static unsigned long long FIO_compressZstdFrame(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, @@ -1341,8 +1335,8 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, int compressionLevel, U64* readsize) { cRess_t const ress = *ressPtr; - FILE* const srcFile = ress.srcFile; - FILE* const dstFile = ress.dstFile; + IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx); + U64 compressedfilesize = 0; ZSTD_EndDirective directive = ZSTD_e_continue; U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; @@ -1356,6 +1350,9 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, unsigned inputPresented = 0; unsigned inputBlocked = 0; unsigned lastJobID = 0; + UTIL_time_t lastAdaptTime = UTIL_getTime(); + U64 const adaptEveryMicro = REFRESH_RATE; + UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize); DISPLAYLEVEL(6, "compression using zstd format \n"); @@ -1375,8 +1372,13 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, UTIL_HumanReadableSize_t windowSize; CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog)); if (windowLog == 0) { - const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0); - windowLog = cParams.windowLog; + if (prefs->ldmFlag) { + /* If long mode is set without a window size libzstd will set this size internally */ + windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + } else { + const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0); + windowLog = (int)cParams.windowLog; + } } windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize))); DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix); @@ -1387,12 +1389,12 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, do { size_t stillToFlush; /* Fill input Buffer */ - size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); - ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; + size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize()); + ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 ); DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); *readsize += inSize; - if ((inSize == 0) || (*readsize == fileSize)) + if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize)) directive = ZSTD_e_end; stillToFlush = 1; @@ -1400,9 +1402,10 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, || (directive == ZSTD_e_end && stillToFlush != 0) ) { size_t const oldIPos = inBuff.pos; - ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; + ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); + AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos); /* count stats */ inputPresented++; @@ -1411,151 +1414,155 @@ FIO_compressZstdFrame(FIO_ctx_t* const fCtx, /* Write compressed stream */ DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", - (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); + (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); if (outBuff.pos) { - size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); - if (sizeCheck != outBuff.pos) - EXM_THROW(25, "Write error : %s (cannot write compressed block)", - strerror(errno)); + writeJob->usedBufferSize = outBuff.pos; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); compressedfilesize += outBuff.pos; } - /* display notification; and adapt compression level */ - if (READY_FOR_UPDATE()) { + /* adaptive mode : statistics measurement and speed correction */ + if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) { + ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); + + lastAdaptTime = UTIL_getTime(); + + /* check output speed */ + if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ + + unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; + assert(zfp.produced >= previous_zfp_update.produced); + assert(prefs->nbWorkers >= 1); + + /* test if compression is blocked + * either because output is slow and all buffers are full + * or because input is slow and no job can start while waiting for at least one buffer to be filled. + * note : exclude starting part, since currentJobID > 1 */ + if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ + && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ + ) { + DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") + speedChange = slower; + } + + previous_zfp_update = zfp; + + if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ + && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ + ) { + DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); + speedChange = slower; + } + flushWaiting = 0; + } + + /* course correct only if there is at least one new job completed */ + if (zfp.currentJobID > lastJobID) { + DISPLAYLEVEL(6, "compression level adaptation check \n") + + /* check input speed */ + if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ + if (inputBlocked <= 0) { + DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); + speedChange = slower; + } else if (speedChange == noChange) { + unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; + unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; + unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; + previous_zfp_correction = zfp; + assert(inputPresented > 0); + DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", + inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, + (unsigned)newlyIngested, (unsigned)newlyConsumed, + (unsigned)newlyFlushed, (unsigned)newlyProduced); + if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ + && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ + && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ + ) { + DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", + newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); + speedChange = faster; + } + } + inputBlocked = 0; + inputPresented = 0; + } + + if (speedChange == slower) { + DISPLAYLEVEL(6, "slower speed , higher compression \n") + compressionLevel ++; + if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); + if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; + compressionLevel += (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + if (speedChange == faster) { + DISPLAYLEVEL(6, "faster speed , lighter compression \n") + compressionLevel --; + if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; + compressionLevel -= (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + speedChange = noChange; + + lastJobID = zfp.currentJobID; + } /* if (zfp.currentJobID > lastJobID) */ + } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */ + + /* display notification */ + if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) { ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed); UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed); UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); + DELAY_NEXT_UPDATE(); + /* display progress notifications */ + DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */ if (g_display_prefs.displayLevel >= 3) { - DISPLAYUPDATE(3, "\r(L%i) Buffered :%6.*f%4s - Consumed :%6.*f%4s - Compressed :%6.*f%4s => %.2f%% ", - compressionLevel, - buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, - consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, - produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, - cShare ); - } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) { + /* Verbose progress update */ + DISPLAY_PROGRESS( + "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ", + compressionLevel, + buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, + consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, + produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, + cShare ); + } else { /* Require level 2 or forcibly displayed progress counter for summarized updates */ - DISPLAYLEVEL(1, "\r%79s\r", ""); /* Clear out the current displayed line */ if (fCtx->nbFilesTotal > 1) { size_t srcFileNameSize = strlen(srcFileName); /* Ensure that the string we print is roughly the same size each time */ if (srcFileNameSize > 18) { const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; - DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ", + DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); } else { - DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ", + DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); } } - DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); + DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); if (fileSize != UTIL_FILESIZE_UNKNOWN) - DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); - DISPLAYLEVEL(1, " ==> %2.f%%", cShare); - DELAY_NEXT_UPDATE(); + DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); + DISPLAY_PROGRESS(" ==> %2.f%%", cShare); } - - /* adaptive mode : statistics measurement and speed correction */ - if (prefs->adaptiveMode) { - - /* check output speed */ - if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ - - unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; - unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; - assert(zfp.produced >= previous_zfp_update.produced); - assert(prefs->nbWorkers >= 1); - - /* test if compression is blocked - * either because output is slow and all buffers are full - * or because input is slow and no job can start while waiting for at least one buffer to be filled. - * note : exclude starting part, since currentJobID > 1 */ - if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ - && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ - ) { - DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") - speedChange = slower; - } - - previous_zfp_update = zfp; - - if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ - && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ - ) { - DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); - speedChange = slower; - } - flushWaiting = 0; - } - - /* course correct only if there is at least one new job completed */ - if (zfp.currentJobID > lastJobID) { - DISPLAYLEVEL(6, "compression level adaptation check \n") - - /* check input speed */ - if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ - if (inputBlocked <= 0) { - DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); - speedChange = slower; - } else if (speedChange == noChange) { - unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; - unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; - unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; - unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; - previous_zfp_correction = zfp; - assert(inputPresented > 0); - DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", - inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, - (unsigned)newlyIngested, (unsigned)newlyConsumed, - (unsigned)newlyFlushed, (unsigned)newlyProduced); - if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ - && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ - && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ - ) { - DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", - newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); - speedChange = faster; - } - } - inputBlocked = 0; - inputPresented = 0; - } - - if (speedChange == slower) { - DISPLAYLEVEL(6, "slower speed , higher compression \n") - compressionLevel ++; - if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); - if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; - compressionLevel += (compressionLevel == 0); /* skip 0 */ - ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); - } - if (speedChange == faster) { - DISPLAYLEVEL(6, "faster speed , lighter compression \n") - compressionLevel --; - if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; - compressionLevel -= (compressionLevel == 0); /* skip 0 */ - ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); - } - speedChange = noChange; - - lastJobID = zfp.currentJobID; - } /* if (zfp.currentJobID > lastJobID) */ - } /* if (g_adaptiveMode) */ - } /* if (READY_FOR_UPDATE()) */ + } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */ } /* while ((inBuff.pos != inBuff.size) */ } while (directive != ZSTD_e_end); - if (ferror(srcFile)) { - EXM_THROW(26, "Read error : I/O error"); - } if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", (unsigned long long)*readsize, (unsigned long long)fileSize); } + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx); + return compressedfilesize; } @@ -1620,20 +1627,18 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, /* Status */ fCtx->totalBytesInput += (size_t)readsize; fCtx->totalBytesOutput += (size_t)compressedfilesize; - DISPLAYLEVEL(2, "\r%79s\r", ""); - if (g_display_prefs.displayLevel >= 2 && - !fCtx->hasStdoutOutput && - (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) { UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize); UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize); if (readsize == 0) { - DISPLAYLEVEL(2,"%-20s : (%6.*f%4s => %6.*f%4s, %s) \n", + DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n", srcFileName, hr_isize.precision, hr_isize.value, hr_isize.suffix, hr_osize.precision, hr_osize.value, hr_osize.suffix, dstFileName); } else { - DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6.*f%4s => %6.*f%4s, %s) \n", + DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n", srcFileName, (double)compressedfilesize / (double)readsize * 100, hr_isize.precision, hr_isize.value, hr_isize.suffix, @@ -1656,7 +1661,7 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, /*! FIO_compressFilename_dstFile() : - * open dstFileName, or pass-through if ress.dstFile != NULL, + * open dstFileName, or pass-through if ress.file != NULL, * then start compression with FIO_compressFilename_internal(). * Manages source removal (--rm) and file permissions transfer. * note : ress.srcFile must be != NULL, @@ -1669,27 +1674,29 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, cRess_t ress, const char* dstFileName, const char* srcFileName, + const stat_t* srcFileStat, int compressionLevel) { int closeDstFile = 0; int result; - stat_t statbuf; - int transferMTime = 0; - assert(ress.srcFile != NULL); - if (ress.dstFile == NULL) { - int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; + int transferStat = 0; + FILE *dstFile; + + assert(AIO_ReadPool_getFile(ress.readCtx) != NULL); + if (AIO_WritePool_getFile(ress.writeCtx) == NULL) { + int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS; if ( strcmp (srcFileName, stdinmark) && strcmp (dstFileName, stdoutmark) - && UTIL_stat(srcFileName, &statbuf) - && UTIL_isRegularFileStat(&statbuf) ) { - dstFilePermissions = statbuf.st_mode; - transferMTime = 1; + && UTIL_isRegularFileStat(srcFileStat) ) { + transferStat = 1; + dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS; } closeDstFile = 1; DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); - ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); - if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ + dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions); + if (dstFile==NULL) return 1; /* could not open dstFileName */ + AIO_WritePool_setFile(ress.writeCtx, dstFile); /* Must only be added after FIO_openDstFile() succeeds. * Otherwise we may delete the destination file if it already exists, * and the user presses Ctrl-C when asked if they wish to overwrite. @@ -1700,18 +1707,15 @@ static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); if (closeDstFile) { - FILE* const dstFile = ress.dstFile; - ress.dstFile = NULL; - clearHandler(); DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); - if (fclose(dstFile)) { /* error closing dstFile */ + if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */ DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result=1; } - if (transferMTime) { - UTIL_utime(dstFileName, &statbuf); + if (transferStat) { + UTIL_setFileStat(dstFileName, srcFileStat); } if ( (result != 0) /* operation failure */ && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ @@ -1752,18 +1756,27 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, int compressionLevel) { int result; + FILE* srcFile; + stat_t srcFileStat; + U64 fileSize = UTIL_FILESIZE_UNKNOWN; DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); - /* ensure src is not a directory */ - if (UTIL_isDirectory(srcFileName)) { - DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); - return 1; - } + if (strcmp(srcFileName, stdinmark)) { + if (UTIL_stat(srcFileName, &srcFileStat)) { + /* failure to stat at all is handled during opening */ - /* ensure src is not the same as dict (if present) */ - if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) { - DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); - return 1; + /* ensure src is not a directory */ + if (UTIL_isDirectoryStat(&srcFileStat)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + /* ensure src is not the same as dict (if present) */ + if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) { + DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); + return 1; + } + } } /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used @@ -1775,16 +1788,30 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, return 0; } - ress.srcFile = FIO_openSrcFile(prefs, srcFileName); - if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ + srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); + if (srcFile == NULL) return 1; /* srcFile could not be opened */ + + /* Don't use AsyncIO for small files */ + if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ + fileSize = UTIL_getFileSizeStat(&srcFileStat); + if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { + AIO_ReadPool_setAsync(ress.readCtx, 0); + AIO_WritePool_setAsync(ress.writeCtx, 0); + } else { + AIO_ReadPool_setAsync(ress.readCtx, 1); + AIO_WritePool_setAsync(ress.writeCtx, 1); + } - result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + AIO_ReadPool_setFile(ress.readCtx, srcFile); + result = FIO_compressFilename_dstFile( + fCtx, prefs, ress, + dstFileName, srcFileName, + &srcFileStat, compressionLevel); + AIO_ReadPool_closeFile(ress.readCtx); - fclose(ress.srcFile); - ress.srcFile = NULL; - if ( prefs->removeSrcFile /* --rm */ - && result == 0 /* success */ - && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ + if ( prefs->removeSrcFile /* --rm */ + && result == 0 /* success */ + && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ ) { /* We must clear the handler, since after this point calling it would * delete both the source and destination files. @@ -1796,16 +1823,18 @@ FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, return result; } -static const char* checked_index(const char* options[], size_t length, size_t index) { +static const char* +checked_index(const char* options[], size_t length, size_t index) { assert(index < length); - // Necessary to avoid warnings since -O3 will omit the above `assert` + /* Necessary to avoid warnings since -O3 will omit the above `assert` */ (void) length; return options[index]; } -#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (index)) +#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index)) -void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) { +void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) +{ static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, LZMA_EXTENSION, LZ4_EXTENSION}; static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"}; @@ -1866,6 +1895,11 @@ FIO_determineCompressedName(const char* srcFileName, const char* outDirName, con char* outDirFilename = NULL; size_t sfnSize = strlen(srcFileName); size_t const srcSuffixLen = strlen(suffix); + + if(!strcmp(srcFileName, stdinmark)) { + return stdoutmark; + } + if (outDirName) { outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); sfnSize = strlen(outDirFilename); @@ -1928,23 +1962,24 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, /* init */ assert(outFileName != NULL || suffix != NULL); if (outFileName != NULL) { /* output into a single destination (stdout typically) */ - if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FILE *dstFile; + if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { FIO_freeCResources(&ress); return 1; } - ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); - if (ress.dstFile == NULL) { /* could not open outFileName */ + dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (dstFile == NULL) { /* could not open outFileName */ error = 1; } else { + AIO_WritePool_setFile(ress.writeCtx, dstFile); for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); if (!status) fCtx->nbFilesProcessed++; error |= status; } - if (fclose(ress.dstFile)) + if (AIO_WritePool_closeFile(ress.writeCtx)) EXM_THROW(29, "Write error (%s) : cannot properly close %s", strerror(errno), outFileName); - ress.dstFile = NULL; } } else { if (outMirroredRootDirName) @@ -1975,16 +2010,23 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); } - if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput); UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput); - DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "%3d files compressed :%.2f%% (%6.*f%4s => %6.*f%4s)\n", - fCtx->nbFilesProcessed, - (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, - hr_isize.precision, hr_isize.value, hr_isize.suffix, - hr_osize.precision, hr_osize.value, hr_osize.suffix); + DISPLAY_PROGRESS("\r%79s\r", ""); + if (fCtx->totalBytesInput == 0) { + DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } else { + DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } } FIO_freeCResources(&ress); @@ -2001,13 +2043,9 @@ int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, * Decompression ***************************************************************************/ typedef struct { - void* srcBuffer; - size_t srcBufferSize; - size_t srcBufferLoaded; - void* dstBuffer; - size_t dstBufferSize; ZSTD_DStream* dctx; - FILE* dstFile; + WritePoolCtx_t *writeCtx; + ReadPoolCtx_t *readCtx; } dRess_t; static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) @@ -2025,159 +2063,49 @@ static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFi CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); - ress.srcBufferSize = ZSTD_DStreamInSize(); - ress.srcBuffer = malloc(ress.srcBufferSize); - ress.dstBufferSize = ZSTD_DStreamOutSize(); - ress.dstBuffer = malloc(ress.dstBufferSize); - if (!ress.srcBuffer || !ress.dstBuffer) - EXM_THROW(61, "Allocation error : not enough memory"); - /* dictionary */ { void* dictBuffer; - size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); - CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); + stat_t statbuf; + size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs, &statbuf); + CHECK( ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) ); + CHECK( ZSTD_DCtx_loadDictionary(ress.dctx, dictBuffer, dictBufferSize) ); free(dictBuffer); } + ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize()); + ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize()); + return ress; } static void FIO_freeDResources(dRess_t ress) { CHECK( ZSTD_freeDStream(ress.dctx) ); - free(ress.srcBuffer); - free(ress.dstBuffer); + AIO_WritePool_free(ress.writeCtx); + AIO_ReadPool_free(ress.readCtx); } - -/** FIO_fwriteSparse() : -* @return : storedSkips, -* argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */ -static unsigned -FIO_fwriteSparse(FILE* file, - const void* buffer, size_t bufferSize, - const FIO_prefs_t* const prefs, - unsigned storedSkips) -{ - const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ - size_t bufferSizeT = bufferSize / sizeof(size_t); - const size_t* const bufferTEnd = bufferT + bufferSizeT; - const size_t* ptrT = bufferT; - static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */ - - if (prefs->testMode) return 0; /* do not output anything in test mode */ - - if (!prefs->sparseFileSupport) { /* normal write */ - size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); - if (sizeCheck != bufferSize) - EXM_THROW(70, "Write error : cannot write decoded block : %s", - strerror(errno)); - return 0; - } - - /* avoid int overflow */ - if (storedSkips > 1 GB) { - if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0) - EXM_THROW(91, "1 GB skip error (sparse file support)"); - storedSkips -= 1 GB; - } - - while (ptrT < bufferTEnd) { - size_t nb0T; - - /* adjust last segment if < 32 KB */ - size_t seg0SizeT = segmentSizeT; - if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; - bufferSizeT -= seg0SizeT; - - /* count leading zeroes */ - for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; - storedSkips += (unsigned)(nb0T * sizeof(size_t)); - - if (nb0T != seg0SizeT) { /* not all 0s */ - size_t const nbNon0ST = seg0SizeT - nb0T; - /* skip leading zeros */ - if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) - EXM_THROW(92, "Sparse skip error ; try --no-sparse"); - storedSkips = 0; - /* write the rest */ - if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST) - EXM_THROW(93, "Write error : cannot write decoded block : %s", - strerror(errno)); - } - ptrT += seg0SizeT; - } - - { static size_t const maskT = sizeof(size_t)-1; - if (bufferSize & maskT) { - /* size not multiple of sizeof(size_t) : implies end of block */ - const char* const restStart = (const char*)bufferTEnd; - const char* restPtr = restStart; - const char* const restEnd = (const char*)buffer + bufferSize; - assert(restEnd > restStart && restEnd < restStart + sizeof(size_t)); - for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; - storedSkips += (unsigned) (restPtr - restStart); - if (restPtr != restEnd) { - /* not all remaining bytes are 0 */ - size_t const restSize = (size_t)(restEnd - restPtr); - if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) - EXM_THROW(92, "Sparse skip error ; try --no-sparse"); - if (fwrite(restPtr, 1, restSize, file) != restSize) - EXM_THROW(95, "Write error : cannot write end of decoded block : %s", - strerror(errno)); - storedSkips = 0; - } } } - - return storedSkips; -} - -static void -FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) -{ - if (prefs->testMode) assert(storedSkips == 0); - if (storedSkips>0) { - assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */ - (void)prefs; /* assert can be disabled, in which case prefs becomes unused */ - if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0) - EXM_THROW(69, "Final skip error (sparse file support)"); - /* last zero must be explicitly written, - * so that skipped ones get implicitly translated as zero by FS */ - { const char lastZeroByte[1] = { 0 }; - if (fwrite(lastZeroByte, 1, 1, file) != 1) - EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno)); - } } -} - - /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode @return : 0 (no error) */ -static int FIO_passThrough(const FIO_prefs_t* const prefs, - FILE* foutput, FILE* finput, - void* buffer, size_t bufferSize, - size_t alreadyLoaded) -{ - size_t const blockSize = MIN(64 KB, bufferSize); - size_t readFromInput; - unsigned storedSkips = 0; - - /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ - { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); - if (sizeCheck != alreadyLoaded) { - DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno)); - return 1; - } } - - do { - readFromInput = fread(buffer, 1, blockSize, finput); - storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips); - } while (readFromInput == blockSize); - if (ferror(finput)) { - DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno)); - return 1; - } - assert(feof(finput)); - - FIO_fwriteSparseEnd(prefs, foutput, storedSkips); +static int FIO_passThrough(dRess_t *ress) +{ + size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize()); + IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + + while(ress->readCtx->srcBufferLoaded) { + size_t writeSize; + writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded); + assert(writeSize <= writeJob->bufferSize); + memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize); + writeJob->usedBufferSize = writeSize; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); + AIO_ReadPool_consumeBytes(ress->readCtx, writeSize); + AIO_ReadPool_fillBuffer(ress->readCtx, blockSize); + } + assert(ress->readCtx->reachedEof); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return 0; } @@ -2195,7 +2123,7 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, return; /* Try to decode the frame header */ - err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); + err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded); if (err == 0) { unsigned long long const windowSize = header.windowSize; unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); @@ -2218,13 +2146,13 @@ FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, */ #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) static unsigned long long -FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, +FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, const FIO_prefs_t* const prefs, const char* srcFileName, U64 alreadyDecoded) /* for multi-frames streams */ { U64 frameSize = 0; - U32 storedSkips = 0; + IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx); /* display last 20 characters only */ { size_t const srcFileLength = strlen(srcFileName); @@ -2234,67 +2162,60 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ - { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; - if (ress->srcBufferLoaded < toDecode) { - size_t const toRead = toDecode - ress->srcBufferLoaded; - void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; - ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); - } } + AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX); /* Main decompression Loop */ while (1) { - ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; - ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; + ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 ); + ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 ); size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); - const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2; UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize); if (ZSTD_isError(readSizeHint)) { DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", srcFileName, ZSTD_getErrorName(readSizeHint)); FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); + AIO_WritePool_releaseIoJob(writeJob); return FIO_ERROR_FRAME_DECODING; } /* Write block */ - storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); + writeJob->usedBufferSize = outBuff.pos; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); frameSize += outBuff.pos; if (fCtx->nbFilesTotal > 1) { size_t srcFileNameSize = strlen(srcFileName); if (srcFileNameSize > 18) { const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; - DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", - fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); + DISPLAYUPDATE_PROGRESS( + "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); } else { - DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", + DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix); } } else { - DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s... ", + DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ", srcFileName, hrs.precision, hrs.value, hrs.suffix); } - if (inBuff.pos > 0) { - memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); - ress->srcBufferLoaded -= inBuff.pos; - } + AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos); if (readSizeHint == 0) break; /* end of frame */ /* Fill input buffer */ - { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ - if (ress->srcBufferLoaded < toDecode) { - size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ - void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; - size_t const readSize = fread(startPosition, 1, toRead, finput); + { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */ + if (ress->readCtx->srcBufferLoaded < toDecode) { + size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode); if (readSize==0) { DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", - srcFileName); + srcFileName); + AIO_WritePool_releaseIoJob(writeJob); return FIO_ERROR_FRAME_DECODING; } - ress->srcBufferLoaded += readSize; - } } } + } } } - FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return frameSize; } @@ -2302,37 +2223,36 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, #ifdef ZSTD_GZDECOMPRESS static unsigned long long -FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, - const FIO_prefs_t* const prefs, - const char* srcFileName) +FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) { unsigned long long outFileSize = 0; z_stream strm; int flush = Z_NO_FLUSH; int decodingError = 0; - unsigned storedSkips = 0; + IOJob_t *writeJob = NULL; strm.zalloc = Z_NULL; strm.zfree = Z_NULL; strm.opaque = Z_NULL; strm.next_in = 0; strm.avail_in = 0; - /* see http://www.zlib.net/manual.html */ + /* see https://www.zlib.net/manual.html */ if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) return FIO_ERROR_FRAME_DECODING; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; - strm.avail_in = (uInt)ress->srcBufferLoaded; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; for ( ; ; ) { int ret; if (strm.avail_in == 0) { - ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); - if (ress->srcBufferLoaded == 0) flush = Z_FINISH; - strm.next_in = (z_const unsigned char*)ress->srcBuffer; - strm.avail_in = (uInt)ress->srcBufferLoaded; + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH; + strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer; + strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded; } ret = inflate(&strm, flush); if (ret == Z_BUF_ERROR) { @@ -2343,35 +2263,34 @@ FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); decodingError = 1; break; } - { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; if (decompBytes) { - storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); + writeJob->usedBufferSize = decompBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += decompBytes; - strm.next_out = (Bytef*)ress->dstBuffer; - strm.avail_out = (uInt)ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; } } if (ret == Z_STREAM_END) break; } - if (strm.avail_in > 0) - memmove(ress->srcBuffer, strm.next_in, strm.avail_in); - ress->srcBufferLoaded = strm.avail_in; + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); + if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ && (decodingError==0) ) { DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); decodingError = 1; } - FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; } #endif - #ifdef ZSTD_LZMADECOMPRESS static unsigned long long -FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, - const FIO_prefs_t* const prefs, +FIO_decompressLzmaFrame(dRess_t* ress, const char* srcFileName, int plain_lzma) { unsigned long long outFileSize = 0; @@ -2379,7 +2298,7 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, lzma_action action = LZMA_RUN; lzma_ret initRet; int decodingError = 0; - unsigned storedSkips = 0; + IOJob_t *writeJob = NULL; strm.next_in = 0; strm.avail_in = 0; @@ -2396,18 +2315,19 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, return FIO_ERROR_FRAME_DECODING; } - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = ress->srcBufferLoaded; + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = (uInt)writeJob->bufferSize; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; for ( ; ; ) { lzma_ret ret; if (strm.avail_in == 0) { - ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); - if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; - strm.next_in = (BYTE const*)ress->srcBuffer; - strm.avail_in = ress->srcBufferLoaded; + AIO_ReadPool_consumeAndRefill(ress->readCtx); + if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = (BYTE const*)ress->readCtx->srcBuffer; + strm.avail_in = ress->readCtx->srcBufferLoaded; } ret = lzma_code(&strm, action); @@ -2420,104 +2340,90 @@ FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, srcFileName, ret); decodingError = 1; break; } - { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + { size_t const decompBytes = writeJob->bufferSize - strm.avail_out; if (decompBytes) { - storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); + writeJob->usedBufferSize = decompBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); outFileSize += decompBytes; - strm.next_out = (BYTE*)ress->dstBuffer; - strm.avail_out = ress->dstBufferSize; + strm.next_out = (Bytef*)writeJob->buffer; + strm.avail_out = writeJob->bufferSize; } } if (ret == LZMA_STREAM_END) break; } - if (strm.avail_in > 0) - memmove(ress->srcBuffer, strm.next_in, strm.avail_in); - ress->srcBufferLoaded = strm.avail_in; + AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in); lzma_end(&strm); - FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; } #endif #ifdef ZSTD_LZ4DECOMPRESS static unsigned long long -FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, - const FIO_prefs_t* const prefs, - const char* srcFileName) +FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName) { unsigned long long filesize = 0; - LZ4F_errorCode_t nextToLoad; + LZ4F_errorCode_t nextToLoad = 4; LZ4F_decompressionContext_t dCtx; LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); int decodingError = 0; - unsigned storedSkips = 0; + IOJob_t *writeJob = NULL; if (LZ4F_isError(errorCode)) { DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); return FIO_ERROR_FRAME_DECODING; } - /* Init feed with magic number (already consumed from FILE* sFile) */ - { size_t inSize = 4; - size_t outSize= 0; - MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); - nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); - if (LZ4F_isError(nextToLoad)) { - DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", - srcFileName, LZ4F_getErrorName(nextToLoad)); - LZ4F_freeDecompressionContext(dCtx); - return FIO_ERROR_FRAME_DECODING; - } } + writeJob = AIO_WritePool_acquireJob(ress->writeCtx); /* Main Loop */ for (;nextToLoad;) { - size_t readSize; size_t pos = 0; - size_t decodedBytes = ress->dstBufferSize; + size_t decodedBytes = writeJob->bufferSize; + int fullBufferDecoded = 0; /* Read input */ - if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; - readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); - if (!readSize) break; /* reached end of file or stream */ + AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad); + if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */ - while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ + while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */ /* Decode Input (at least partially) */ - size_t remaining = readSize - pos; - decodedBytes = ress->dstBufferSize; - nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); + size_t remaining = ress->readCtx->srcBufferLoaded - pos; + decodedBytes = writeJob->bufferSize; + nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos, + &remaining, NULL); if (LZ4F_isError(nextToLoad)) { DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", srcFileName, LZ4F_getErrorName(nextToLoad)); decodingError = 1; nextToLoad = 0; break; } pos += remaining; + assert(pos <= ress->readCtx->srcBufferLoaded); + fullBufferDecoded = decodedBytes == writeJob->bufferSize; /* Write Block */ if (decodedBytes) { UTIL_HumanReadableSize_t hrs; - storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips); + writeJob->usedBufferSize = decodedBytes; + AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob); filesize += decodedBytes; hrs = UTIL_makeHumanReadableSize(filesize); - DISPLAYUPDATE(2, "\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); + DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); } if (!nextToLoad) break; } + AIO_ReadPool_consumeBytes(ress->readCtx, pos); } - /* can be out because readSize == 0, which could be an fread() error */ - if (ferror(srcFile)) { - DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); - decodingError=1; - } - if (nextToLoad!=0) { DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); decodingError=1; } LZ4F_freeDecompressionContext(dCtx); - ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ - FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + AIO_WritePool_releaseIoJob(writeJob); + AIO_WritePool_sparseWriteEnd(ress->writeCtx); return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; } @@ -2532,23 +2438,30 @@ FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, * 1 : error */ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, - dRess_t ress, FILE* srcFile, - const FIO_prefs_t* const prefs, - const char* dstFileName, const char* srcFileName) + dRess_t ress, const FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName) { unsigned readSomething = 0; unsigned long long filesize = 0; - assert(srcFile != NULL); + int passThrough = prefs->passThrough; + + if (passThrough == -1) { + /* If pass-through mode is not explicitly enabled or disabled, + * default to the legacy behavior of enabling it if we are writing + * to stdout with the overwrite flag enabled. + */ + passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark); + } + assert(passThrough == 0 || passThrough == 1); /* for each frame */ for ( ; ; ) { /* check magic number -> version */ size_t const toRead = 4; - const BYTE* const buf = (const BYTE*)ress.srcBuffer; - if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ - ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, - (size_t)1, toRead - ress.srcBufferLoaded, srcFile); - if (ress.srcBufferLoaded==0) { + const BYTE* buf; + AIO_ReadPool_fillBuffer(ress.readCtx, toRead); + buf = (const BYTE*)ress.readCtx->srcBuffer; + if (ress.readCtx->srcBufferLoaded==0) { if (readSomething==0) { /* srcFile is empty (which is invalid) */ DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); return 1; @@ -2556,17 +2469,20 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, break; /* no more input */ } readSomething = 1; /* there is at least 1 byte in srcFile */ - if (ress.srcBufferLoaded < toRead) { + if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */ + if (passThrough) { + return FIO_passThrough(&ress); + } DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); return 1; } - if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { - unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize); + if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) { + unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ #ifdef ZSTD_GZDECOMPRESS - unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName); + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else @@ -2576,7 +2492,7 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ #ifdef ZSTD_LZMADECOMPRESS - unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD); + unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else @@ -2585,18 +2501,15 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, #endif } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { #ifdef ZSTD_LZ4DECOMPRESS - unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName); + unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); return 1; #endif - } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ - return FIO_passThrough(prefs, - ress.dstFile, srcFile, - ress.srcBuffer, ress.srcBufferSize, - ress.srcBufferLoaded); + } else if (passThrough) { + return FIO_passThrough(&ress); } else { DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); return 1; @@ -2604,48 +2517,45 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, /* Final Status */ fCtx->totalBytesOutput += (size_t)filesize; - DISPLAYLEVEL(2, "\r%79s\r", ""); - /* No status message in pipe mode (stdin - stdout) or multi-files mode */ - if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) || - g_display_prefs.displayLevel >= 3 || - g_display_prefs.progressSetting == FIO_ps_always) { - DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize); - } + DISPLAY_PROGRESS("\r%79s\r", ""); + if (FIO_shouldDisplayFileSummary(fCtx)) + DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize); return 0; } /** FIO_decompressDstFile() : - open `dstFileName`, - or path-through if ress.dstFile is already != 0, + open `dstFileName`, or pass-through if writeCtx's file is already != 0, then start decompression process (FIO_decompressFrames()). @return : 0 : OK 1 : operation aborted */ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, - dRess_t ress, FILE* srcFile, - const char* dstFileName, const char* srcFileName) + dRess_t ress, + const char* dstFileName, + const char* srcFileName, + const stat_t* srcFileStat) { int result; - stat_t statbuf; int releaseDstFile = 0; - int transferMTime = 0; + int transferStat = 0; - if ((ress.dstFile == NULL) && (prefs->testMode==0)) { + if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) { + FILE *dstFile; int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ && strcmp(dstFileName, stdoutmark) - && UTIL_stat(srcFileName, &statbuf) - && UTIL_isRegularFileStat(&statbuf) ) { - dstFilePermissions = statbuf.st_mode; - transferMTime = 1; + && UTIL_isRegularFileStat(srcFileStat) ) { + transferStat = 1; + dstFilePermissions = TEMPORARY_FILE_PERMISSIONS; } releaseDstFile = 1; - ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); - if (ress.dstFile==NULL) return 1; + dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); + if (dstFile==NULL) return 1; + AIO_WritePool_setFile(ress.writeCtx, dstFile); /* Must only be added after FIO_openDstFile() succeeds. * Otherwise we may delete the destination file if it already exists, @@ -2654,19 +2564,17 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, addHandler(dstFileName); } - result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName); + result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName); if (releaseDstFile) { - FILE* const dstFile = ress.dstFile; clearHandler(); - ress.dstFile = NULL; - if (fclose(dstFile)) { + if (AIO_WritePool_closeFile(ress.writeCtx)) { DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); result = 1; } - if (transferMTime) { - UTIL_utime(dstFileName, &statbuf); + if (transferStat) { + UTIL_setFileStat(dstFileName, srcFileStat); } if ( (result != 0) /* operation failure */ @@ -2688,18 +2596,34 @@ static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) { FILE* srcFile; + stat_t srcFileStat; int result; + U64 fileSize = UTIL_FILESIZE_UNKNOWN; if (UTIL_isDirectory(srcFileName)) { DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); return 1; } - srcFile = FIO_openSrcFile(prefs, srcFileName); + srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat); if (srcFile==NULL) return 1; - ress.srcBufferLoaded = 0; - result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName); + /* Don't use AsyncIO for small files */ + if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */ + fileSize = UTIL_getFileSizeStat(&srcFileStat); + if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) { + AIO_ReadPool_setAsync(ress.readCtx, 0); + AIO_WritePool_setAsync(ress.writeCtx, 0); + } else { + AIO_ReadPool_setAsync(ress.readCtx, 1); + AIO_WritePool_setAsync(ress.writeCtx, 1); + } + + AIO_ReadPool_setFile(ress.readCtx, srcFile); + + result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat); + + AIO_ReadPool_setFile(ress.readCtx, NULL); /* Close file */ if (fclose(srcFile)) { @@ -2788,6 +2712,11 @@ FIO_determineDstName(const char* srcFileName, const char* outDirName) size_t srcSuffixLen; const char* const srcSuffix = strrchr(srcFileName, '.'); + + if(!strcmp(srcFileName, stdinmark)) { + return stdoutmark; + } + if (srcSuffix == NULL) { DISPLAYLEVEL(1, "zstd: %s: unknown suffix (%s expected). " @@ -2869,20 +2798,21 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, dRess_t ress = FIO_createDResources(prefs, dictFileName); if (outFileName) { - if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { FIO_freeDResources(ress); return 1; } if (!prefs->testMode) { - ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); - if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); + FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); + AIO_WritePool_setFile(ress.writeCtx, dstFile); } for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); if (!status) fCtx->nbFilesProcessed++; error |= status; } - if ((!prefs->testMode) && (fclose(ress.dstFile))) + if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx))) EXM_THROW(72, "Write error : %s : cannot properly close output file", strerror(errno)); } else { @@ -2912,8 +2842,11 @@ FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); } - if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) - DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); + if (FIO_shouldDisplayMultipleFileSummary(fCtx)) { + DISPLAY_PROGRESS("\r%79s\r", ""); + DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n", + fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput); + } FIO_freeDResources(ress); return error; @@ -2931,7 +2864,9 @@ typedef struct { int numSkippableFrames; int decompUnavailable; int usesCheck; + BYTE checksum[4]; U32 nbFiles; + unsigned dictID; } fileInfo_t; typedef enum { @@ -2939,7 +2874,7 @@ typedef enum { info_frame_error=1, info_not_zstd=2, info_file_error=3, - info_truncated_input=4, + info_truncated_input=4 } InfoError; #define ERROR_IF(c,n,...) { \ @@ -2986,6 +2921,12 @@ FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) } ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, info_frame_error, "Error: could not decode frame header"); + if (info->dictID != 0 && info->dictID != header.dictID) { + DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead"); + info->dictID = 0; + } else { + info->dictID = header.dictID; + } info->windowSize = header.windowSize; /* move to the end of the frame header */ { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); @@ -3018,8 +2959,8 @@ FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; if (contentChecksumFlag) { info->usesCheck = 1; - ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0, - info_frame_error, "Error: could not skip past checksum"); + ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4, + info_frame_error, "Error: could not read checksum"); } } info->numActualFrames++; } @@ -3045,10 +2986,11 @@ static InfoError getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) { InfoError status; - FILE* const srcFile = FIO_openSrcFile(NULL, inFileName); + stat_t srcFileStat; + FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat); ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); - info->compressedSize = UTIL_getFileSize(inFileName); + info->compressedSize = UTIL_getFileSizeStat(&srcFileStat); status = FIO_analyzeFrames(info, srcFile); fclose(srcFile); @@ -3098,6 +3040,7 @@ displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); if (info->numSkippableFrames) DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); + DISPLAYOUT("DictID: %u\n", info->dictID); DISPLAYOUT("Window Size: %.*f%s (%llu B)\n", window_hrs.precision, window_hrs.value, window_hrs.suffix, (unsigned long long)info->windowSize); @@ -3110,7 +3053,16 @@ displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) (unsigned long long)info->decompressedSize); DISPLAYOUT("Ratio: %.4f\n", ratio); } - DISPLAYOUT("Check: %s\n", checkString); + + if (info->usesCheck && info->numActualFrames == 1) { + DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString, + info->checksum[3], info->checksum[2], + info->checksum[1], info->checksum[0] + ); + } else { + DISPLAYOUT("Check: %s\n", checkString); + } + DISPLAYOUT("\n"); } } @@ -3174,7 +3126,7 @@ int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int dis } } if (numFiles == 0) { - if (!IS_CONSOLE(stdin)) { + if (!UTIL_isConsole(stdin)) { DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); } DISPLAYLEVEL(1, "No files given \n"); diff --git a/contrib/libs/zstd/programs/fileio.h b/contrib/libs/zstd/programs/fileio.h index 61094db83c..291d4d4145 100644 --- a/contrib/libs/zstd/programs/fileio.h +++ b/contrib/libs/zstd/programs/fileio.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,6 +12,8 @@ #ifndef FILEIO_H_23981798732 #define FILEIO_H_23981798732 +#include "fileio_types.h" +#include "util.h" /* FileNamesTable */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ #include "../lib/zstd.h" /* ZSTD_* */ @@ -53,10 +55,6 @@ extern "C" { /*-************************************* * Types ***************************************/ -typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; - -typedef struct FIO_prefs_s FIO_prefs_t; - FIO_prefs_t* FIO_createPreferences(void); void FIO_freePreferences(FIO_prefs_t* const prefs); @@ -66,9 +64,6 @@ typedef struct FIO_ctx_s FIO_ctx_t; FIO_ctx_t* FIO_createContext(void); void FIO_freeContext(FIO_ctx_t* const fCtx); -typedef struct FIO_display_prefs_s FIO_display_prefs_t; - -typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e; /*-************************************* * Parameters @@ -76,7 +71,7 @@ typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e; /* FIO_prefs_t functions */ void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType); void FIO_overwriteMode(FIO_prefs_t* const prefs); -void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt); +void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt); void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel); void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel); void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder); @@ -91,8 +86,8 @@ void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch); void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit); void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers); void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); -void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); -void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ +void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag); +void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); @@ -109,6 +104,8 @@ void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices); void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value); void FIO_setContentSize(FIO_prefs_t* const prefs, int value); void FIO_displayCompressionParameters(const FIO_prefs_t* prefs); +void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value); +void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value); /* FIO_ctx_t functions */ void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value); @@ -171,6 +168,9 @@ int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles); /* custom crash signal handler */ void FIO_addAbortHandler(void); +char const* FIO_zlibVersion(void); +char const* FIO_lz4Version(void); +char const* FIO_lzmaVersion(void); #if defined (__cplusplus) diff --git a/contrib/libs/zstd/programs/fileio_asyncio.c b/contrib/libs/zstd/programs/fileio_asyncio.c new file mode 100644 index 0000000000..fe9cca95d1 --- /dev/null +++ b/contrib/libs/zstd/programs/fileio_asyncio.c @@ -0,0 +1,663 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "platform.h" +#include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */ +#include <stdlib.h> /* malloc, free */ +#include <assert.h> +#include <errno.h> /* errno */ + +#if defined (_MSC_VER) +# include <sys/stat.h> +# include <io.h> +#endif + +#include "fileio_asyncio.h" +#include "fileio_common.h" + +/* ********************************************************************** + * Sparse write + ************************************************************************/ + +/** AIO_fwriteSparse() : +* @return : storedSkips, +* argument for next call to AIO_fwriteSparse() or AIO_fwriteSparseEnd() */ +static unsigned +AIO_fwriteSparse(FILE* file, + const void* buffer, size_t bufferSize, + const FIO_prefs_t* const prefs, + unsigned storedSkips) +{ + const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ + size_t bufferSizeT = bufferSize / sizeof(size_t); + const size_t* const bufferTEnd = bufferT + bufferSizeT; + const size_t* ptrT = bufferT; + static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */ + + if (prefs->testMode) return 0; /* do not output anything in test mode */ + + if (!prefs->sparseFileSupport) { /* normal write */ + size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); + if (sizeCheck != bufferSize) + EXM_THROW(70, "Write error : cannot write block : %s", + strerror(errno)); + return 0; + } + + /* avoid int overflow */ + if (storedSkips > 1 GB) { + if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0) + EXM_THROW(91, "1 GB skip error (sparse file support)"); + storedSkips -= 1 GB; + } + + while (ptrT < bufferTEnd) { + size_t nb0T; + + /* adjust last segment if < 32 KB */ + size_t seg0SizeT = segmentSizeT; + if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; + bufferSizeT -= seg0SizeT; + + /* count leading zeroes */ + for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeof(size_t)); + + if (nb0T != seg0SizeT) { /* not all 0s */ + size_t const nbNon0ST = seg0SizeT - nb0T; + /* skip leading zeros */ + if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) + EXM_THROW(92, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + /* write the rest */ + if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST) + EXM_THROW(93, "Write error : cannot write block : %s", + strerror(errno)); + } + ptrT += seg0SizeT; + } + + { static size_t const maskT = sizeof(size_t)-1; + if (bufferSize & maskT) { + /* size not multiple of sizeof(size_t) : implies end of block */ + const char* const restStart = (const char*)bufferTEnd; + const char* restPtr = restStart; + const char* const restEnd = (const char*)buffer + bufferSize; + assert(restEnd > restStart && restEnd < restStart + sizeof(size_t)); + for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) { + /* not all remaining bytes are 0 */ + size_t const restSize = (size_t)(restEnd - restPtr); + if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) + EXM_THROW(92, "Sparse skip error ; try --no-sparse"); + if (fwrite(restPtr, 1, restSize, file) != restSize) + EXM_THROW(95, "Write error : cannot write end of decoded block : %s", + strerror(errno)); + storedSkips = 0; + } } } + + return storedSkips; +} + +static void +AIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) +{ + if (prefs->testMode) assert(storedSkips == 0); + if (storedSkips>0) { + assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */ + (void)prefs; /* assert can be disabled, in which case prefs becomes unused */ + if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0) + EXM_THROW(69, "Final skip error (sparse file support)"); + /* last zero must be explicitly written, + * so that skipped ones get implicitly translated as zero by FS */ + { const char lastZeroByte[1] = { 0 }; + if (fwrite(lastZeroByte, 1, 1, file) != 1) + EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno)); + } } +} + + +/* ********************************************************************** + * AsyncIO functionality + ************************************************************************/ + +/* AIO_supported: + * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */ +int AIO_supported(void) { +#ifdef ZSTD_MULTITHREAD + return 1; +#else + return 0; +#endif +} + +/* *********************************** + * Generic IoPool implementation + *************************************/ + +static IOJob_t *AIO_IOPool_createIoJob(IOPoolCtx_t *ctx, size_t bufferSize) { + IOJob_t* const job = (IOJob_t*) malloc(sizeof(IOJob_t)); + void* const buffer = malloc(bufferSize); + if(!job || !buffer) + EXM_THROW(101, "Allocation error : not enough memory"); + job->buffer = buffer; + job->bufferSize = bufferSize; + job->usedBufferSize = 0; + job->file = NULL; + job->ctx = ctx; + job->offset = 0; + return job; +} + + +/* AIO_IOPool_createThreadPool: + * Creates a thread pool and a mutex for threaded IO pool. + * Displays warning if asyncio is requested but MT isn't available. */ +static void AIO_IOPool_createThreadPool(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs) { + ctx->threadPool = NULL; + ctx->threadPoolActive = 0; + if(prefs->asyncIO) { + if (ZSTD_pthread_mutex_init(&ctx->ioJobsMutex, NULL)) + EXM_THROW(102,"Failed creating ioJobsMutex mutex"); + /* We want MAX_IO_JOBS-2 queue items because we need to always have 1 free buffer to + * decompress into and 1 buffer that's actively written to disk and owned by the writing thread. */ + assert(MAX_IO_JOBS >= 2); + ctx->threadPool = POOL_create(1, MAX_IO_JOBS - 2); + ctx->threadPoolActive = 1; + if (!ctx->threadPool) + EXM_THROW(104, "Failed creating I/O thread pool"); + } +} + +/* AIO_IOPool_init: + * Allocates and sets and a new I/O thread pool including its included availableJobs. */ +static void AIO_IOPool_init(IOPoolCtx_t* ctx, const FIO_prefs_t* prefs, POOL_function poolFunction, size_t bufferSize) { + int i; + AIO_IOPool_createThreadPool(ctx, prefs); + ctx->prefs = prefs; + ctx->poolFunction = poolFunction; + ctx->totalIoJobs = ctx->threadPool ? MAX_IO_JOBS : 2; + ctx->availableJobsCount = ctx->totalIoJobs; + for(i=0; i < ctx->availableJobsCount; i++) { + ctx->availableJobs[i] = AIO_IOPool_createIoJob(ctx, bufferSize); + } + ctx->jobBufferSize = bufferSize; + ctx->file = NULL; +} + + +/* AIO_IOPool_threadPoolActive: + * Check if current operation uses thread pool. + * Note that in some cases we have a thread pool initialized but choose not to use it. */ +static int AIO_IOPool_threadPoolActive(IOPoolCtx_t* ctx) { + return ctx->threadPool && ctx->threadPoolActive; +} + + +/* AIO_IOPool_lockJobsMutex: + * Locks the IO jobs mutex if threading is active */ +static void AIO_IOPool_lockJobsMutex(IOPoolCtx_t* ctx) { + if(AIO_IOPool_threadPoolActive(ctx)) + ZSTD_pthread_mutex_lock(&ctx->ioJobsMutex); +} + +/* AIO_IOPool_unlockJobsMutex: + * Unlocks the IO jobs mutex if threading is active */ +static void AIO_IOPool_unlockJobsMutex(IOPoolCtx_t* ctx) { + if(AIO_IOPool_threadPoolActive(ctx)) + ZSTD_pthread_mutex_unlock(&ctx->ioJobsMutex); +} + +/* AIO_IOPool_releaseIoJob: + * Releases an acquired job back to the pool. Doesn't execute the job. */ +static void AIO_IOPool_releaseIoJob(IOJob_t* job) { + IOPoolCtx_t* const ctx = (IOPoolCtx_t *) job->ctx; + AIO_IOPool_lockJobsMutex(ctx); + assert(ctx->availableJobsCount < ctx->totalIoJobs); + ctx->availableJobs[ctx->availableJobsCount++] = job; + AIO_IOPool_unlockJobsMutex(ctx); +} + +/* AIO_IOPool_join: + * Waits for all tasks in the pool to finish executing. */ +static void AIO_IOPool_join(IOPoolCtx_t* ctx) { + if(AIO_IOPool_threadPoolActive(ctx)) + POOL_joinJobs(ctx->threadPool); +} + +/* AIO_IOPool_setThreaded: + * Allows (de)activating threaded mode, to be used when the expected overhead + * of threading costs more than the expected gains. */ +static void AIO_IOPool_setThreaded(IOPoolCtx_t* ctx, int threaded) { + assert(threaded == 0 || threaded == 1); + assert(ctx != NULL); + if(ctx->threadPoolActive != threaded) { + AIO_IOPool_join(ctx); + ctx->threadPoolActive = threaded; + } +} + +/* AIO_IOPool_free: + * Release a previously allocated IO thread pool. Makes sure all tasks are done and released. */ +static void AIO_IOPool_destroy(IOPoolCtx_t* ctx) { + int i; + if(ctx->threadPool) { + /* Make sure we finish all tasks and then free the resources */ + AIO_IOPool_join(ctx); + /* Make sure we are not leaking availableJobs */ + assert(ctx->availableJobsCount == ctx->totalIoJobs); + POOL_free(ctx->threadPool); + ZSTD_pthread_mutex_destroy(&ctx->ioJobsMutex); + } + assert(ctx->file == NULL); + for(i=0; i<ctx->availableJobsCount; i++) { + IOJob_t* job = (IOJob_t*) ctx->availableJobs[i]; + free(job->buffer); + free(job); + } +} + +/* AIO_IOPool_acquireJob: + * Returns an available io job to be used for a future io. */ +static IOJob_t* AIO_IOPool_acquireJob(IOPoolCtx_t* ctx) { + IOJob_t *job; + assert(ctx->file != NULL || ctx->prefs->testMode); + AIO_IOPool_lockJobsMutex(ctx); + assert(ctx->availableJobsCount > 0); + job = (IOJob_t*) ctx->availableJobs[--ctx->availableJobsCount]; + AIO_IOPool_unlockJobsMutex(ctx); + job->usedBufferSize = 0; + job->file = ctx->file; + job->offset = 0; + return job; +} + + +/* AIO_IOPool_setFile: + * Sets the destination file for future files in the pool. + * Requires completion of all queued jobs and release of all otherwise acquired jobs. */ +static void AIO_IOPool_setFile(IOPoolCtx_t* ctx, FILE* file) { + assert(ctx!=NULL); + AIO_IOPool_join(ctx); + assert(ctx->availableJobsCount == ctx->totalIoJobs); + ctx->file = file; +} + +static FILE* AIO_IOPool_getFile(const IOPoolCtx_t* ctx) { + return ctx->file; +} + +/* AIO_IOPool_enqueueJob: + * Enqueues an io job for execution. + * The queued job shouldn't be used directly after queueing it. */ +static void AIO_IOPool_enqueueJob(IOJob_t* job) { + IOPoolCtx_t* const ctx = (IOPoolCtx_t *)job->ctx; + if(AIO_IOPool_threadPoolActive(ctx)) + POOL_add(ctx->threadPool, ctx->poolFunction, job); + else + ctx->poolFunction(job); +} + +/* *********************************** + * WritePool implementation + *************************************/ + +/* AIO_WritePool_acquireJob: + * Returns an available write job to be used for a future write. */ +IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t* ctx) { + return AIO_IOPool_acquireJob(&ctx->base); +} + +/* AIO_WritePool_enqueueAndReacquireWriteJob: + * Queues a write job for execution and acquires a new one. + * After execution `job`'s pointed value would change to the newly acquired job. + * Make sure to set `usedBufferSize` to the wanted length before call. + * The queued job shouldn't be used directly after queueing it. */ +void AIO_WritePool_enqueueAndReacquireWriteJob(IOJob_t **job) { + AIO_IOPool_enqueueJob(*job); + *job = AIO_IOPool_acquireJob((IOPoolCtx_t *)(*job)->ctx); +} + +/* AIO_WritePool_sparseWriteEnd: + * Ends sparse writes to the current file. + * Blocks on completion of all current write jobs before executing. */ +void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t* ctx) { + assert(ctx != NULL); + AIO_IOPool_join(&ctx->base); + AIO_fwriteSparseEnd(ctx->base.prefs, ctx->base.file, ctx->storedSkips); + ctx->storedSkips = 0; +} + +/* AIO_WritePool_setFile: + * Sets the destination file for future writes in the pool. + * Requires completion of all queues write jobs and release of all otherwise acquired jobs. + * Also requires ending of sparse write if a previous file was used in sparse mode. */ +void AIO_WritePool_setFile(WritePoolCtx_t* ctx, FILE* file) { + AIO_IOPool_setFile(&ctx->base, file); + assert(ctx->storedSkips == 0); +} + +/* AIO_WritePool_getFile: + * Returns the file the writePool is currently set to write to. */ +FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx) { + return AIO_IOPool_getFile(&ctx->base); +} + +/* AIO_WritePool_releaseIoJob: + * Releases an acquired job back to the pool. Doesn't execute the job. */ +void AIO_WritePool_releaseIoJob(IOJob_t* job) { + AIO_IOPool_releaseIoJob(job); +} + +/* AIO_WritePool_closeFile: + * Ends sparse write and closes the writePool's current file and sets the file to NULL. + * Requires completion of all queues write jobs and release of all otherwise acquired jobs. */ +int AIO_WritePool_closeFile(WritePoolCtx_t* ctx) { + FILE* const dstFile = ctx->base.file; + assert(dstFile!=NULL || ctx->base.prefs->testMode!=0); + AIO_WritePool_sparseWriteEnd(ctx); + AIO_IOPool_setFile(&ctx->base, NULL); + return fclose(dstFile); +} + +/* AIO_WritePool_executeWriteJob: + * Executes a write job synchronously. Can be used as a function for a thread pool. */ +static void AIO_WritePool_executeWriteJob(void* opaque){ + IOJob_t* const job = (IOJob_t*) opaque; + WritePoolCtx_t* const ctx = (WritePoolCtx_t*) job->ctx; + ctx->storedSkips = AIO_fwriteSparse(job->file, job->buffer, job->usedBufferSize, ctx->base.prefs, ctx->storedSkips); + AIO_IOPool_releaseIoJob(job); +} + +/* AIO_WritePool_create: + * Allocates and sets and a new write pool including its included jobs. */ +WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize) { + WritePoolCtx_t* const ctx = (WritePoolCtx_t*) malloc(sizeof(WritePoolCtx_t)); + if(!ctx) EXM_THROW(100, "Allocation error : not enough memory"); + AIO_IOPool_init(&ctx->base, prefs, AIO_WritePool_executeWriteJob, bufferSize); + ctx->storedSkips = 0; + return ctx; +} + +/* AIO_WritePool_free: + * Frees and releases a writePool and its resources. Closes destination file if needs to. */ +void AIO_WritePool_free(WritePoolCtx_t* ctx) { + /* Make sure we finish all tasks and then free the resources */ + if(AIO_WritePool_getFile(ctx)) + AIO_WritePool_closeFile(ctx); + AIO_IOPool_destroy(&ctx->base); + assert(ctx->storedSkips==0); + free(ctx); +} + +/* AIO_WritePool_setAsync: + * Allows (de)activating async mode, to be used when the expected overhead + * of asyncio costs more than the expected gains. */ +void AIO_WritePool_setAsync(WritePoolCtx_t* ctx, int async) { + AIO_IOPool_setThreaded(&ctx->base, async); +} + + +/* *********************************** + * ReadPool implementation + *************************************/ +static void AIO_ReadPool_releaseAllCompletedJobs(ReadPoolCtx_t* ctx) { + int i; + for(i=0; i<ctx->completedJobsCount; i++) { + IOJob_t* job = (IOJob_t*) ctx->completedJobs[i]; + AIO_IOPool_releaseIoJob(job); + } + ctx->completedJobsCount = 0; +} + +static void AIO_ReadPool_addJobToCompleted(IOJob_t* job) { + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx; + AIO_IOPool_lockJobsMutex(&ctx->base); + assert(ctx->completedJobsCount < MAX_IO_JOBS); + ctx->completedJobs[ctx->completedJobsCount++] = job; + if(AIO_IOPool_threadPoolActive(&ctx->base)) { + ZSTD_pthread_cond_signal(&ctx->jobCompletedCond); + } + AIO_IOPool_unlockJobsMutex(&ctx->base); +} + +/* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked: + * Looks through the completed jobs for a job matching the waitingOnOffset and returns it, + * if job wasn't found returns NULL. + * IMPORTANT: assumes ioJobsMutex is locked. */ +static IOJob_t* AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ReadPoolCtx_t* ctx) { + IOJob_t *job = NULL; + int i; + /* This implementation goes through all completed jobs and looks for the one matching the next offset. + * While not strictly needed for a single threaded reader implementation (as in such a case we could expect + * reads to be completed in order) this implementation was chosen as it better fits other asyncio + * interfaces (such as io_uring) that do not provide promises regarding order of completion. */ + for (i=0; i<ctx->completedJobsCount; i++) { + job = (IOJob_t *) ctx->completedJobs[i]; + if (job->offset == ctx->waitingOnOffset) { + ctx->completedJobs[i] = ctx->completedJobs[--ctx->completedJobsCount]; + return job; + } + } + return NULL; +} + +/* AIO_ReadPool_numReadsInFlight: + * Returns the number of IO read jobs currently in flight. */ +static size_t AIO_ReadPool_numReadsInFlight(ReadPoolCtx_t* ctx) { + const size_t jobsHeld = (ctx->currentJobHeld==NULL ? 0 : 1); + return ctx->base.totalIoJobs - (ctx->base.availableJobsCount + ctx->completedJobsCount + jobsHeld); +} + +/* AIO_ReadPool_getNextCompletedJob: + * Returns a completed IOJob_t for the next read in line based on waitingOnOffset and advances waitingOnOffset. + * Would block. */ +static IOJob_t* AIO_ReadPool_getNextCompletedJob(ReadPoolCtx_t* ctx) { + IOJob_t *job = NULL; + AIO_IOPool_lockJobsMutex(&ctx->base); + + job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx); + + /* As long as we didn't find the job matching the next read, and we have some reads in flight continue waiting */ + while (!job && (AIO_ReadPool_numReadsInFlight(ctx) > 0)) { + assert(ctx->base.threadPool != NULL); /* we shouldn't be here if we work in sync mode */ + ZSTD_pthread_cond_wait(&ctx->jobCompletedCond, &ctx->base.ioJobsMutex); + job = AIO_ReadPool_findNextWaitingOffsetCompletedJob_locked(ctx); + } + + if(job) { + assert(job->offset == ctx->waitingOnOffset); + ctx->waitingOnOffset += job->usedBufferSize; + } + + AIO_IOPool_unlockJobsMutex(&ctx->base); + return job; +} + + +/* AIO_ReadPool_executeReadJob: + * Executes a read job synchronously. Can be used as a function for a thread pool. */ +static void AIO_ReadPool_executeReadJob(void* opaque){ + IOJob_t* const job = (IOJob_t*) opaque; + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t *)job->ctx; + if(ctx->reachedEof) { + job->usedBufferSize = 0; + AIO_ReadPool_addJobToCompleted(job); + return; + } + job->usedBufferSize = fread(job->buffer, 1, job->bufferSize, job->file); + if(job->usedBufferSize < job->bufferSize) { + if(ferror(job->file)) { + EXM_THROW(37, "Read error"); + } else if(feof(job->file)) { + ctx->reachedEof = 1; + } else { + EXM_THROW(37, "Unexpected short read"); + } + } + AIO_ReadPool_addJobToCompleted(job); +} + +static void AIO_ReadPool_enqueueRead(ReadPoolCtx_t* ctx) { + IOJob_t* const job = AIO_IOPool_acquireJob(&ctx->base); + job->offset = ctx->nextReadOffset; + ctx->nextReadOffset += job->bufferSize; + AIO_IOPool_enqueueJob(job); +} + +static void AIO_ReadPool_startReading(ReadPoolCtx_t* ctx) { + int i; + for (i = 0; i < ctx->base.availableJobsCount; i++) { + AIO_ReadPool_enqueueRead(ctx); + } +} + +/* AIO_ReadPool_setFile: + * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL. + * Waits for all current enqueued tasks to complete if a previous file was set. */ +void AIO_ReadPool_setFile(ReadPoolCtx_t* ctx, FILE* file) { + assert(ctx!=NULL); + AIO_IOPool_join(&ctx->base); + AIO_ReadPool_releaseAllCompletedJobs(ctx); + if (ctx->currentJobHeld) { + AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld); + ctx->currentJobHeld = NULL; + } + AIO_IOPool_setFile(&ctx->base, file); + ctx->nextReadOffset = 0; + ctx->waitingOnOffset = 0; + ctx->srcBuffer = ctx->coalesceBuffer; + ctx->srcBufferLoaded = 0; + ctx->reachedEof = 0; + if(file != NULL) + AIO_ReadPool_startReading(ctx); +} + +/* AIO_ReadPool_create: + * Allocates and sets and a new readPool including its included jobs. + * bufferSize should be set to the maximal buffer we want to read at a time, will also be used + * as our basic read size. */ +ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize) { + ReadPoolCtx_t* const ctx = (ReadPoolCtx_t*) malloc(sizeof(ReadPoolCtx_t)); + if(!ctx) EXM_THROW(100, "Allocation error : not enough memory"); + AIO_IOPool_init(&ctx->base, prefs, AIO_ReadPool_executeReadJob, bufferSize); + + ctx->coalesceBuffer = (U8*) malloc(bufferSize * 2); + ctx->srcBuffer = ctx->coalesceBuffer; + ctx->srcBufferLoaded = 0; + ctx->completedJobsCount = 0; + ctx->currentJobHeld = NULL; + + if(ctx->base.threadPool) + if (ZSTD_pthread_cond_init(&ctx->jobCompletedCond, NULL)) + EXM_THROW(103,"Failed creating jobCompletedCond cond"); + + return ctx; +} + +/* AIO_ReadPool_free: + * Frees and releases a readPool and its resources. Closes source file. */ +void AIO_ReadPool_free(ReadPoolCtx_t* ctx) { + if(AIO_ReadPool_getFile(ctx)) + AIO_ReadPool_closeFile(ctx); + if(ctx->base.threadPool) + ZSTD_pthread_cond_destroy(&ctx->jobCompletedCond); + AIO_IOPool_destroy(&ctx->base); + free(ctx->coalesceBuffer); + free(ctx); +} + +/* AIO_ReadPool_consumeBytes: + * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */ +void AIO_ReadPool_consumeBytes(ReadPoolCtx_t* ctx, size_t n) { + assert(n <= ctx->srcBufferLoaded); + ctx->srcBufferLoaded -= n; + ctx->srcBuffer += n; +} + +/* AIO_ReadPool_releaseCurrentlyHeldAndGetNext: + * Release the current held job and get the next one, returns NULL if no next job available. */ +static IOJob_t* AIO_ReadPool_releaseCurrentHeldAndGetNext(ReadPoolCtx_t* ctx) { + if (ctx->currentJobHeld) { + AIO_IOPool_releaseIoJob((IOJob_t *)ctx->currentJobHeld); + ctx->currentJobHeld = NULL; + AIO_ReadPool_enqueueRead(ctx); + } + ctx->currentJobHeld = AIO_ReadPool_getNextCompletedJob(ctx); + return (IOJob_t*) ctx->currentJobHeld; +} + +/* AIO_ReadPool_fillBuffer: + * Tries to fill the buffer with at least n or jobBufferSize bytes (whichever is smaller). + * Returns if srcBuffer has at least the expected number of bytes loaded or if we've reached the end of the file. + * Return value is the number of bytes added to the buffer. + * Note that srcBuffer might have up to 2 times jobBufferSize bytes. */ +size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t* ctx, size_t n) { + IOJob_t *job; + int useCoalesce = 0; + if(n > ctx->base.jobBufferSize) + n = ctx->base.jobBufferSize; + + /* We are good, don't read anything */ + if (ctx->srcBufferLoaded >= n) + return 0; + + /* We still have bytes loaded, but not enough to satisfy caller. We need to get the next job + * and coalesce the remaining bytes with the next job's buffer */ + if (ctx->srcBufferLoaded > 0) { + useCoalesce = 1; + memcpy(ctx->coalesceBuffer, ctx->srcBuffer, ctx->srcBufferLoaded); + ctx->srcBuffer = ctx->coalesceBuffer; + } + + /* Read the next chunk */ + job = AIO_ReadPool_releaseCurrentHeldAndGetNext(ctx); + if(!job) + return 0; + if(useCoalesce) { + assert(ctx->srcBufferLoaded + job->usedBufferSize <= 2*ctx->base.jobBufferSize); + memcpy(ctx->coalesceBuffer + ctx->srcBufferLoaded, job->buffer, job->usedBufferSize); + ctx->srcBufferLoaded += job->usedBufferSize; + } + else { + ctx->srcBuffer = (U8 *) job->buffer; + ctx->srcBufferLoaded = job->usedBufferSize; + } + return job->usedBufferSize; +} + +/* AIO_ReadPool_consumeAndRefill: + * Consumes the current buffer and refills it with bufferSize bytes. */ +size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t* ctx) { + AIO_ReadPool_consumeBytes(ctx, ctx->srcBufferLoaded); + return AIO_ReadPool_fillBuffer(ctx, ctx->base.jobBufferSize); +} + +/* AIO_ReadPool_getFile: + * Returns the current file set for the read pool. */ +FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t* ctx) { + return AIO_IOPool_getFile(&ctx->base); +} + +/* AIO_ReadPool_closeFile: + * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */ +int AIO_ReadPool_closeFile(ReadPoolCtx_t* ctx) { + FILE* const file = AIO_ReadPool_getFile(ctx); + AIO_ReadPool_setFile(ctx, NULL); + return fclose(file); +} + +/* AIO_ReadPool_setAsync: + * Allows (de)activating async mode, to be used when the expected overhead + * of asyncio costs more than the expected gains. */ +void AIO_ReadPool_setAsync(ReadPoolCtx_t* ctx, int async) { + AIO_IOPool_setThreaded(&ctx->base, async); +} diff --git a/contrib/libs/zstd/programs/fileio_asyncio.h b/contrib/libs/zstd/programs/fileio_asyncio.h new file mode 100644 index 0000000000..feb25a3f9e --- /dev/null +++ b/contrib/libs/zstd/programs/fileio_asyncio.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /* + * FileIO AsyncIO exposes read/write IO pools that allow doing IO asynchronously. + * Current implementation relies on having one thread that reads and one that + * writes. + * Each IO pool supports up to `MAX_IO_JOBS` that can be enqueued for work, but + * are performed serially by the appropriate worker thread. + * Most systems exposes better primitives to perform asynchronous IO, such as + * io_uring on newer linux systems. The API is built in such a way that in the + * future we could replace the threads with better solutions when available. + */ + +#ifndef ZSTD_FILEIO_ASYNCIO_H +#define ZSTD_FILEIO_ASYNCIO_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "../lib/common/mem.h" /* U32, U64 */ +#include "fileio_types.h" +#include "platform.h" +#include "util.h" +#include "../lib/common/pool.h" +#include "../lib/common/threading.h" + +#define MAX_IO_JOBS (10) + +typedef struct { + /* These struct fields should be set only on creation and not changed afterwards */ + POOL_ctx* threadPool; + int threadPoolActive; + int totalIoJobs; + const FIO_prefs_t* prefs; + POOL_function poolFunction; + + /* Controls the file we currently write to, make changes only by using provided utility functions */ + FILE* file; + + /* The jobs and availableJobsCount fields are accessed by both the main and worker threads and should + * only be mutated after locking the mutex */ + ZSTD_pthread_mutex_t ioJobsMutex; + void* availableJobs[MAX_IO_JOBS]; + int availableJobsCount; + size_t jobBufferSize; +} IOPoolCtx_t; + +typedef struct { + IOPoolCtx_t base; + + /* State regarding the currently read file */ + int reachedEof; + U64 nextReadOffset; + U64 waitingOnOffset; + + /* We may hold an IOJob object as needed if we actively expose its buffer. */ + void *currentJobHeld; + + /* Coalesce buffer is used to join two buffers in case where we need to read more bytes than left in + * the first of them. Shouldn't be accessed from outside ot utility functions. */ + U8 *coalesceBuffer; + + /* Read buffer can be used by consumer code, take care when copying this pointer aside as it might + * change when consuming / refilling buffer. */ + U8 *srcBuffer; + size_t srcBufferLoaded; + + /* We need to know what tasks completed so we can use their buffers when their time comes. + * Should only be accessed after locking base.ioJobsMutex . */ + void* completedJobs[MAX_IO_JOBS]; + int completedJobsCount; + ZSTD_pthread_cond_t jobCompletedCond; +} ReadPoolCtx_t; + +typedef struct { + IOPoolCtx_t base; + unsigned storedSkips; +} WritePoolCtx_t; + +typedef struct { + /* These fields are automatically set and shouldn't be changed by non WritePool code. */ + void *ctx; + FILE* file; + void *buffer; + size_t bufferSize; + + /* This field should be changed before a job is queued for execution and should contain the number + * of bytes to write from the buffer. */ + size_t usedBufferSize; + U64 offset; +} IOJob_t; + +/* AIO_supported: + * Returns 1 if AsyncIO is supported on the system, 0 otherwise. */ +int AIO_supported(void); + + +/* AIO_WritePool_releaseIoJob: + * Releases an acquired job back to the pool. Doesn't execute the job. */ +void AIO_WritePool_releaseIoJob(IOJob_t *job); + +/* AIO_WritePool_acquireJob: + * Returns an available write job to be used for a future write. */ +IOJob_t* AIO_WritePool_acquireJob(WritePoolCtx_t *ctx); + +/* AIO_WritePool_enqueueAndReacquireWriteJob: + * Enqueues a write job for execution and acquires a new one. + * After execution `job`'s pointed value would change to the newly acquired job. + * Make sure to set `usedBufferSize` to the wanted length before call. + * The queued job shouldn't be used directly after queueing it. */ +void AIO_WritePool_enqueueAndReacquireWriteJob(IOJob_t **job); + +/* AIO_WritePool_sparseWriteEnd: + * Ends sparse writes to the current file. + * Blocks on completion of all current write jobs before executing. */ +void AIO_WritePool_sparseWriteEnd(WritePoolCtx_t *ctx); + +/* AIO_WritePool_setFile: + * Sets the destination file for future writes in the pool. + * Requires completion of all queues write jobs and release of all otherwise acquired jobs. + * Also requires ending of sparse write if a previous file was used in sparse mode. */ +void AIO_WritePool_setFile(WritePoolCtx_t *ctx, FILE* file); + +/* AIO_WritePool_getFile: + * Returns the file the writePool is currently set to write to. */ +FILE* AIO_WritePool_getFile(const WritePoolCtx_t* ctx); + +/* AIO_WritePool_closeFile: + * Ends sparse write and closes the writePool's current file and sets the file to NULL. + * Requires completion of all queues write jobs and release of all otherwise acquired jobs. */ +int AIO_WritePool_closeFile(WritePoolCtx_t *ctx); + +/* AIO_WritePool_create: + * Allocates and sets and a new write pool including its included jobs. + * bufferSize should be set to the maximal buffer we want to write to at a time. */ +WritePoolCtx_t* AIO_WritePool_create(const FIO_prefs_t* prefs, size_t bufferSize); + +/* AIO_WritePool_free: + * Frees and releases a writePool and its resources. Closes destination file. */ +void AIO_WritePool_free(WritePoolCtx_t* ctx); + +/* AIO_WritePool_setAsync: + * Allows (de)activating async mode, to be used when the expected overhead + * of asyncio costs more than the expected gains. */ +void AIO_WritePool_setAsync(WritePoolCtx_t* ctx, int async); + +/* AIO_ReadPool_create: + * Allocates and sets and a new readPool including its included jobs. + * bufferSize should be set to the maximal buffer we want to read at a time, will also be used + * as our basic read size. */ +ReadPoolCtx_t* AIO_ReadPool_create(const FIO_prefs_t* prefs, size_t bufferSize); + +/* AIO_ReadPool_free: + * Frees and releases a readPool and its resources. Closes source file. */ +void AIO_ReadPool_free(ReadPoolCtx_t* ctx); + +/* AIO_ReadPool_setAsync: + * Allows (de)activating async mode, to be used when the expected overhead + * of asyncio costs more than the expected gains. */ +void AIO_ReadPool_setAsync(ReadPoolCtx_t* ctx, int async); + +/* AIO_ReadPool_consumeBytes: + * Consumes byes from srcBuffer's beginning and updates srcBufferLoaded accordingly. */ +void AIO_ReadPool_consumeBytes(ReadPoolCtx_t *ctx, size_t n); + +/* AIO_ReadPool_fillBuffer: + * Makes sure buffer has at least n bytes loaded (as long as n is not bigger than the initialized bufferSize). + * Returns if srcBuffer has at least n bytes loaded or if we've reached the end of the file. + * Return value is the number of bytes added to the buffer. + * Note that srcBuffer might have up to 2 times bufferSize bytes. */ +size_t AIO_ReadPool_fillBuffer(ReadPoolCtx_t *ctx, size_t n); + +/* AIO_ReadPool_consumeAndRefill: + * Consumes the current buffer and refills it with bufferSize bytes. */ +size_t AIO_ReadPool_consumeAndRefill(ReadPoolCtx_t *ctx); + +/* AIO_ReadPool_setFile: + * Sets the source file for future read in the pool. Initiates reading immediately if file is not NULL. + * Waits for all current enqueued tasks to complete if a previous file was set. */ +void AIO_ReadPool_setFile(ReadPoolCtx_t *ctx, FILE* file); + +/* AIO_ReadPool_getFile: + * Returns the current file set for the read pool. */ +FILE* AIO_ReadPool_getFile(const ReadPoolCtx_t *ctx); + +/* AIO_ReadPool_closeFile: + * Closes the current set file. Waits for all current enqueued tasks to complete and resets state. */ +int AIO_ReadPool_closeFile(ReadPoolCtx_t *ctx); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FILEIO_ASYNCIO_H */ diff --git a/contrib/libs/zstd/programs/fileio_common.h b/contrib/libs/zstd/programs/fileio_common.h new file mode 100644 index 0000000000..55491b8e32 --- /dev/null +++ b/contrib/libs/zstd/programs/fileio_common.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FILEIO_COMMON_H +#define ZSTD_FILEIO_COMMON_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "../lib/common/mem.h" /* U32, U64 */ +#include "fileio_types.h" +#include "platform.h" +#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ + +/*-************************************* +* Macros +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) +#undef MAX +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +extern FIO_display_prefs_t g_display_prefs; + +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } } + +extern UTIL_time_t g_displayClock; + +#define REFRESH_RATE ((U64)(SEC_TO_MICRO / 6)) +#define READY_FOR_UPDATE() (UTIL_clockSpanMicro(g_displayClock) > REFRESH_RATE || g_display_prefs.displayLevel >= 4) +#define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } +#define DISPLAYUPDATE(l, ...) { \ + if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \ + if (READY_FOR_UPDATE()) { \ + DELAY_NEXT_UPDATE(); \ + DISPLAY(__VA_ARGS__); \ + if (g_display_prefs.displayLevel>=4) fflush(stderr); \ + } } } + +#define SHOULD_DISPLAY_SUMMARY() \ + (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) +#define SHOULD_DISPLAY_PROGRESS() \ + (g_display_prefs.progressSetting != FIO_ps_never && SHOULD_DISPLAY_SUMMARY()) +#define DISPLAY_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYLEVEL(1, __VA_ARGS__); }} +#define DISPLAYUPDATE_PROGRESS(...) { if (SHOULD_DISPLAY_PROGRESS()) { DISPLAYUPDATE(1, __VA_ARGS__); }} +#define DISPLAY_SUMMARY(...) { if (SHOULD_DISPLAY_SUMMARY()) { DISPLAYLEVEL(1, __VA_ARGS__); } } + +#undef MIN /* in case it would be already defined */ +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + + +#define EXM_THROW(error, ...) \ +{ \ + DISPLAYLEVEL(1, "zstd: "); \ + DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + +#define CHECK_V(v, f) \ + v = f; \ + if (ZSTD_isError(v)) { \ + DISPLAYLEVEL(5, "%s \n", #f); \ + EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ + } +#define CHECK(f) { size_t err; CHECK_V(err, f); } + + +/* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW */ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# define LONG_SEEK _fseeki64 +# define LONG_TELL _ftelli64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define LONG_SEEK fseeko +# define LONG_TELL ftello +#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) +# define LONG_SEEK fseeko64 +# define LONG_TELL ftello64 +#elif defined(_WIN32) && !defined(__DJGPP__) +# include <windows.h> + static int LONG_SEEK(FILE* file, __int64 offset, int origin) { + LARGE_INTEGER off; + DWORD method; + off.QuadPart = offset; + if (origin == SEEK_END) + method = FILE_END; + else if (origin == SEEK_CUR) + method = FILE_CURRENT; + else + method = FILE_BEGIN; + + if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) + return 0; + else + return -1; + } + static __int64 LONG_TELL(FILE* file) { + LARGE_INTEGER off, newOff; + off.QuadPart = 0; + newOff.QuadPart = 0; + SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT); + return newOff.QuadPart; + } +#else +# define LONG_SEEK fseek +# define LONG_TELL ftell +#endif + +#if defined (__cplusplus) +} +#endif +#endif /* ZSTD_FILEIO_COMMON_H */ diff --git a/contrib/libs/zstd/programs/fileio_types.h b/contrib/libs/zstd/programs/fileio_types.h new file mode 100644 index 0000000000..c1f42f1ad0 --- /dev/null +++ b/contrib/libs/zstd/programs/fileio_types.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef FILEIO_TYPES_HEADER +#define FILEIO_TYPES_HEADER + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "../lib/zstd.h" /* ZSTD_* */ + +/*-************************************* +* Parameters: FIO_prefs_t +***************************************/ + +typedef struct FIO_display_prefs_s FIO_display_prefs_t; + +typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e; + +struct FIO_display_prefs_s { + int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ + FIO_progressSetting_e progressSetting; +}; + + +typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; + +typedef struct FIO_prefs_s { + + /* Algorithm preferences */ + FIO_compressionType_t compressionType; + int sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ + int dictIDFlag; + int checksumFlag; + int blockSize; + int overlapLog; + int adaptiveMode; + int useRowMatchFinder; + int rsyncable; + int minAdaptLevel; + int maxAdaptLevel; + int ldmFlag; + int ldmHashLog; + int ldmMinMatch; + int ldmBucketSizeLog; + int ldmHashRateLog; + size_t streamSrcSize; + size_t targetCBlockSize; + int srcSizeHint; + int testMode; + ZSTD_paramSwitch_e literalCompressionMode; + + /* IO preferences */ + int removeSrcFile; + int overwrite; + int asyncIO; + + /* Computation resources preferences */ + unsigned memLimit; + int nbWorkers; + + int excludeCompressedFiles; + int patchFromMode; + int contentSize; + int allowBlockDevices; + int passThrough; +} FIO_prefs_t; + +#endif /* FILEIO_TYPES_HEADER */ diff --git a/contrib/libs/zstd/programs/platform.h b/contrib/libs/zstd/programs/platform.h index b858e3b484..18a3587bfe 100644 --- a/contrib/libs/zstd/programs/platform.h +++ b/contrib/libs/zstd/programs/platform.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -33,7 +33,7 @@ extern "C" { /* ************************************** * Detect 64-bit OS -* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +* https://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros ****************************************/ #if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ @@ -80,7 +80,7 @@ extern "C" { * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */ # define PLATFORM_POSIX_VERSION 200112L -/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). +/* try to determine posix version through official unistd.h's _POSIX_VERSION (https://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). * note : there is no simple way to know in advance if <unistd.h> is present or not on target system, * Posix specification mandates its presence and its content, but target system must respect this spec. * It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like @@ -127,6 +127,10 @@ extern "C" { /*-********************************************* * Detect if isatty() and fileno() are available +* +* Note: Use UTIL_isConsole() for the zstd CLI +* instead, as it allows faking is console for +* testing. ************************************************/ #if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \ || (PLATFORM_POSIX_VERSION >= 200112L) \ @@ -192,13 +196,13 @@ static __inline int IS_CONSOLE(FILE* stdStream) { #ifndef ZSTD_SETPRIORITY_SUPPORT - /* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */ + /* mandates presence of <sys/resource.h> and support for setpriority() : https://man7.org/linux/man-pages/man2/setpriority.2.html */ # define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L) #endif #ifndef ZSTD_NANOSLEEP_SUPPORT - /* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */ + /* mandates support of nanosleep() within <time.h> : https://man7.org/linux/man-pages/man2/nanosleep.2.html */ # if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \ || (PLATFORM_POSIX_VERSION >= 200112L) # define ZSTD_NANOSLEEP_SUPPORT 1 diff --git a/contrib/libs/zstd/programs/timefn.c b/contrib/libs/zstd/programs/timefn.c index 64577b0e93..f941e57e61 100644 --- a/contrib/libs/zstd/programs/timefn.c +++ b/contrib/libs/zstd/programs/timefn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -12,7 +12,8 @@ /* === Dependencies === */ #include "timefn.h" - +#include "platform.h" /* set _POSIX_C_SOURCE */ +#include <time.h> /* CLOCK_MONOTONIC, TIME_UTC */ /*-**************************************** * Time functions @@ -20,12 +21,11 @@ #if defined(_WIN32) /* Windows */ +#include <windows.h> /* LARGE_INTEGER */ #include <stdlib.h> /* abort */ #include <stdio.h> /* perror */ -UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } - -PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +UTIL_time_t UTIL_getTime(void) { static LARGE_INTEGER ticksPerSecond; static int init = 0; @@ -36,30 +36,20 @@ PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) } init = 1; } - return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; -} - -PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) -{ - static LARGE_INTEGER ticksPerSecond; - static int init = 0; - if (!init) { - if (!QueryPerformanceFrequency(&ticksPerSecond)) { - perror("timefn::QueryPerformanceFrequency"); - abort(); - } - init = 1; + { UTIL_time_t r; + LARGE_INTEGER x; + QueryPerformanceCounter(&x); + r.t = (PTime)(x.QuadPart * 1000000000ULL / ticksPerSecond.QuadPart); + return r; } - return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; } - #elif defined(__APPLE__) && defined(__MACH__) -UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } +#include <mach/mach_time.h> /* mach_timebase_info_data_t, mach_timebase_info, mach_absolute_time */ -PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +UTIL_time_t UTIL_getTime(void) { static mach_timebase_info_data_t rate; static int init = 0; @@ -67,23 +57,39 @@ PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) mach_timebase_info(&rate); init = 1; } - return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL; + { UTIL_time_t r; + r.t = mach_absolute_time() * (PTime)rate.numer / (PTime)rate.denom; + return r; + } } -PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +/* POSIX.1-2001 (optional) */ +#elif defined(CLOCK_MONOTONIC) + +#include <stdlib.h> /* abort */ +#include <stdio.h> /* perror */ + +UTIL_time_t UTIL_getTime(void) { - static mach_timebase_info_data_t rate; - static int init = 0; - if (!init) { - mach_timebase_info(&rate); - init = 1; + /* time must be initialized, othersize it may fail msan test. + * No good reason, likely a limitation of timespec_get() for some target */ + struct timespec time = { 0, 0 }; + if (clock_gettime(CLOCK_MONOTONIC, &time) != 0) { + perror("timefn::clock_gettime(CLOCK_MONOTONIC)"); + abort(); + } + { UTIL_time_t r; + r.t = (PTime)time.tv_sec * 1000000000ULL + (PTime)time.tv_nsec; + return r; } - return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom); } -/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance. - Android also lacks it but does define TIME_UTC. */ +/* C11 requires support of timespec_get(). + * However, FreeBSD 11 claims C11 compliance while lacking timespec_get(). + * Double confirm timespec_get() support by checking the definition of TIME_UTC. + * However, some versions of Android manage to simultanously define TIME_UTC + * and lack timespec_get() support... */ #elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ && defined(TIME_UTC) && !defined(__ANDROID__) @@ -94,65 +100,49 @@ UTIL_time_t UTIL_getTime(void) { /* time must be initialized, othersize it may fail msan test. * No good reason, likely a limitation of timespec_get() for some target */ - UTIL_time_t time = UTIL_TIME_INITIALIZER; + struct timespec time = { 0, 0 }; if (timespec_get(&time, TIME_UTC) != TIME_UTC) { - perror("timefn::timespec_get"); + perror("timefn::timespec_get(TIME_UTC)"); abort(); } - return time; -} - -static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) -{ - UTIL_time_t diff; - if (end.tv_nsec < begin.tv_nsec) { - diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; - diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; - } else { - diff.tv_sec = end.tv_sec - begin.tv_sec; - diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + { UTIL_time_t r; + r.t = (PTime)time.tv_sec * 1000000000ULL + (PTime)time.tv_nsec; + return r; } - return diff; } -PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) -{ - UTIL_time_t const diff = UTIL_getSpanTime(begin, end); - PTime micro = 0; - micro += 1000000ULL * diff.tv_sec; - micro += diff.tv_nsec / 1000ULL; - return micro; -} -PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) +#else /* relies on standard C90 (note : clock_t produces wrong measurements for multi-threaded workloads) */ + +UTIL_time_t UTIL_getTime(void) { - UTIL_time_t const diff = UTIL_getSpanTime(begin, end); - PTime nano = 0; - nano += 1000000000ULL * diff.tv_sec; - nano += diff.tv_nsec; - return nano; + UTIL_time_t r; + r.t = (PTime)clock() * 1000000000ULL / CLOCKS_PER_SEC; + return r; } - - -#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ - -UTIL_time_t UTIL_getTime(void) { return clock(); } -PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } -PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +#define TIME_MT_MEASUREMENTS_NOT_SUPPORTED #endif +/* ==== Common functions, valid for all time API ==== */ +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + return clockEnd.t - clockStart.t; +} + +PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) +{ + return UTIL_getSpanTimeNano(begin, end) / 1000ULL; +} -/* returns time span in microseconds */ PTime UTIL_clockSpanMicro(UTIL_time_t clockStart ) { UTIL_time_t const clockEnd = UTIL_getTime(); return UTIL_getSpanTimeMicro(clockStart, clockEnd); } -/* returns time span in microseconds */ PTime UTIL_clockSpanNano(UTIL_time_t clockStart ) { UTIL_time_t const clockEnd = UTIL_getTime(); @@ -167,3 +157,12 @@ void UTIL_waitForNextTick(void) clockEnd = UTIL_getTime(); } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); } + +int UTIL_support_MT_measurements(void) +{ +# if defined(TIME_MT_MEASUREMENTS_NOT_SUPPORTED) + return 0; +# else + return 1; +# endif +} diff --git a/contrib/libs/zstd/programs/timefn.h b/contrib/libs/zstd/programs/timefn.h index 3fcd78a28e..b814ff8d8d 100644 --- a/contrib/libs/zstd/programs/timefn.h +++ b/contrib/libs/zstd/programs/timefn.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,70 +16,51 @@ extern "C" { #endif -/*-**************************************** -* Dependencies -******************************************/ -#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ - - /*-**************************************** -* Local Types +* Types ******************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # if defined(_AIX) # include <inttypes.h> # else -# include <stdint.h> /* intptr_t */ +# include <stdint.h> /* uint64_t */ # endif typedef uint64_t PTime; /* Precise Time */ #else typedef unsigned long long PTime; /* does not support compilers without long long support */ #endif +/* UTIL_time_t contains a nanosecond time counter. + * The absolute value is not meaningful. + * It's only valid to compute the difference between 2 measurements. */ +typedef struct { PTime t; } UTIL_time_t; +#define UTIL_TIME_INITIALIZER { 0 } /*-**************************************** * Time functions ******************************************/ -#if defined(_WIN32) /* Windows */ - - #include <windows.h> /* LARGE_INTEGER */ - typedef LARGE_INTEGER UTIL_time_t; - #define UTIL_TIME_INITIALIZER { { 0, 0 } } - -#elif defined(__APPLE__) && defined(__MACH__) - - #include <mach/mach_time.h> - typedef PTime UTIL_time_t; - #define UTIL_TIME_INITIALIZER 0 - -/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance. - Android also lacks it but does define TIME_UTC. */ -#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ - && defined(TIME_UTC) && !defined(__ANDROID__) - - typedef struct timespec UTIL_time_t; - #define UTIL_TIME_INITIALIZER { 0, 0 } - -#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ - typedef clock_t UTIL_time_t; - #define UTIL_TIME_INITIALIZER 0 - -#endif +UTIL_time_t UTIL_getTime(void); +/* Timer resolution can be low on some platforms. + * To improve accuracy, it's recommended to wait for a new tick + * before starting benchmark measurements */ +void UTIL_waitForNextTick(void); +/* tells if timefn will return correct time measurements + * in presence of multi-threaded workload. + * note : this is not the case if only C90 clock_t measurements are available */ +int UTIL_support_MT_measurements(void); -UTIL_time_t UTIL_getTime(void); -PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd); PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd); +PTime UTIL_clockSpanNano(UTIL_time_t clockStart); -#define SEC_TO_MICRO ((PTime)1000000) +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd); PTime UTIL_clockSpanMicro(UTIL_time_t clockStart); -PTime UTIL_clockSpanNano(UTIL_time_t clockStart); -void UTIL_waitForNextTick(void); +#define SEC_TO_MICRO ((PTime)1000000) /* nb of microseconds in a second */ #if defined (__cplusplus) diff --git a/contrib/libs/zstd/programs/util.c b/contrib/libs/zstd/programs/util.c index d69b72a37c..e017772ef6 100644 --- a/contrib/libs/zstd/programs/util.c +++ b/contrib/libs/zstd/programs/util.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -66,6 +66,27 @@ extern "C" { #define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__) #define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } } +static int g_traceDepth = 0; +int g_traceFileStat = 0; + +#define UTIL_TRACE_CALL(...) \ + { \ + if (g_traceFileStat) { \ + UTIL_DISPLAY("Trace:FileStat: %*s> ", g_traceDepth, ""); \ + UTIL_DISPLAY(__VA_ARGS__); \ + UTIL_DISPLAY("\n"); \ + ++g_traceDepth; \ + } \ + } + +#define UTIL_TRACE_RET(ret) \ + { \ + if (g_traceFileStat) { \ + --g_traceDepth; \ + UTIL_DISPLAY("Trace:FileStat: %*s< %d\n", g_traceDepth, "", (ret)); \ + } \ + } + /* A modified version of realloc(). * If UTIL_realloc() fails the original block is freed. */ @@ -100,7 +121,7 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, ch = getchar(); result = 0; if (strchr(acceptableLetters, ch) == NULL) { - UTIL_DISPLAY("%s", abortMsg); + UTIL_DISPLAY("%s \n", abortMsg); result = 1; } /* flush the rest */ @@ -121,21 +142,34 @@ int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, * Functions ***************************************/ +void UTIL_traceFileStat(void) +{ + g_traceFileStat = 1; +} + int UTIL_stat(const char* filename, stat_t* statbuf) { + int ret; + UTIL_TRACE_CALL("UTIL_stat(%s)", filename); #if defined(_MSC_VER) - return !_stat64(filename, statbuf); + ret = !_stat64(filename, statbuf); #elif defined(__MINGW32__) && defined (__MSVCRT__) - return !_stati64(filename, statbuf); + ret = !_stati64(filename, statbuf); #else - return !stat(filename, statbuf); + ret = !stat(filename, statbuf); #endif + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isRegularFile(const char* infilename) { stat_t statbuf; - return UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); + int ret; + UTIL_TRACE_CALL("UTIL_isRegularFile(%s)", infilename); + ret = UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isRegularFileStat(const stat_t* statbuf) @@ -151,71 +185,114 @@ int UTIL_isRegularFileStat(const stat_t* statbuf) int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions) { stat_t localStatBuf; + UTIL_TRACE_CALL("UTIL_chmod(%s, %#4o)", filename, (unsigned)permissions); if (statbuf == NULL) { - if (!UTIL_stat(filename, &localStatBuf)) return 0; + if (!UTIL_stat(filename, &localStatBuf)) { + UTIL_TRACE_RET(0); + return 0; + } statbuf = &localStatBuf; } - if (!UTIL_isRegularFileStat(statbuf)) return 0; /* pretend success, but don't change anything */ - return chmod(filename, permissions); + if (!UTIL_isRegularFileStat(statbuf)) { + UTIL_TRACE_RET(0); + return 0; /* pretend success, but don't change anything */ + } + UTIL_TRACE_CALL("chmod"); + { + int const ret = chmod(filename, permissions); + UTIL_TRACE_RET(ret); + UTIL_TRACE_RET(ret); + return ret; + } } /* set access and modification times */ int UTIL_utime(const char* filename, const stat_t *statbuf) { int ret; + UTIL_TRACE_CALL("UTIL_utime(%s)", filename); /* We check that st_mtime is a macro here in order to give us confidence * that struct stat has a struct timespec st_mtim member. We need this * check because there are some platforms that claim to be POSIX 2008 * compliant but which do not have st_mtim... */ #if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime) - /* (atime, mtime) */ - struct timespec timebuf[2] = { {0, UTIME_NOW} }; - timebuf[1] = statbuf->st_mtim; - ret = utimensat(AT_FDCWD, filename, timebuf, 0); + { + /* (atime, mtime) */ + struct timespec timebuf[2] = { {0, UTIME_NOW} }; + timebuf[1] = statbuf->st_mtim; + ret = utimensat(AT_FDCWD, filename, timebuf, 0); + } #else - struct utimbuf timebuf; - timebuf.actime = time(NULL); - timebuf.modtime = statbuf->st_mtime; - ret = utime(filename, &timebuf); + { + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + ret = utime(filename, &timebuf); + } #endif errno = 0; + UTIL_TRACE_RET(ret); return ret; } int UTIL_setFileStat(const char *filename, const stat_t *statbuf) { int res = 0; - stat_t curStatBuf; - if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) + UTIL_TRACE_CALL("UTIL_setFileStat(%s)", filename); + + if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) { + UTIL_TRACE_RET(-1); return -1; + } /* set access and modification times */ res += UTIL_utime(filename, statbuf); + /* Mimic gzip's behavior: + * + * "Change the group first, then the permissions, then the owner. + * That way, the permissions will be correct on systems that allow + * users to give away files, without introducing a security hole. + * Security depends on permissions not containing the setuid or + * setgid bits." */ + #if !defined(_WIN32) - res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ + res += chown(filename, -1, statbuf->st_gid); /* Apply group ownership */ #endif - res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 07777); /* Copy file permissions */ + res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 0777); /* Copy file permissions */ + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, -1); /* Apply user ownership */ +#endif errno = 0; + UTIL_TRACE_RET(-res); return -res; /* number of errors is returned */ } int UTIL_isDirectory(const char* infilename) { stat_t statbuf; - return UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); + int ret; + UTIL_TRACE_CALL("UTIL_isDirectory(%s)", infilename); + ret = UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); + UTIL_TRACE_RET(ret); + return ret; } int UTIL_isDirectoryStat(const stat_t* statbuf) { + int ret; + UTIL_TRACE_CALL("UTIL_isDirectoryStat()"); #if defined(_MSC_VER) - return (statbuf->st_mode & _S_IFDIR) != 0; + ret = (statbuf->st_mode & _S_IFDIR) != 0; #else - return S_ISDIR(statbuf->st_mode) != 0; + ret = S_ISDIR(statbuf->st_mode) != 0; #endif + UTIL_TRACE_RET(ret); + return ret; } int UTIL_compareStr(const void *p1, const void *p2) { @@ -224,33 +301,68 @@ int UTIL_compareStr(const void *p1, const void *p2) { int UTIL_isSameFile(const char* fName1, const char* fName2) { + int ret; assert(fName1 != NULL); assert(fName2 != NULL); + UTIL_TRACE_CALL("UTIL_isSameFile(%s, %s)", fName1, fName2); #if defined(_MSC_VER) || defined(_WIN32) /* note : Visual does not support file identification by inode. * inode does not work on Windows, even with a posix layer, like msys2. * The following work-around is limited to detecting exact name repetition only, * aka `filename` is considered different from `subdir/../filename` */ - return !strcmp(fName1, fName2); + ret = !strcmp(fName1, fName2); #else { stat_t file1Stat; stat_t file2Stat; - return UTIL_stat(fName1, &file1Stat) + ret = UTIL_stat(fName1, &file1Stat) && UTIL_stat(fName2, &file2Stat) - && (file1Stat.st_dev == file2Stat.st_dev) - && (file1Stat.st_ino == file2Stat.st_ino); + && UTIL_isSameFileStat(fName1, fName2, &file1Stat, &file2Stat); } #endif + UTIL_TRACE_RET(ret); + return ret; +} + +int UTIL_isSameFileStat( + const char* fName1, const char* fName2, + const stat_t* file1Stat, const stat_t* file2Stat) +{ + int ret; + assert(fName1 != NULL); assert(fName2 != NULL); + UTIL_TRACE_CALL("UTIL_isSameFileStat(%s, %s)", fName1, fName2); +#if defined(_MSC_VER) || defined(_WIN32) + /* note : Visual does not support file identification by inode. + * inode does not work on Windows, even with a posix layer, like msys2. + * The following work-around is limited to detecting exact name repetition only, + * aka `filename` is considered different from `subdir/../filename` */ + (void)file1Stat; + (void)file2Stat; + ret = !strcmp(fName1, fName2); +#else + { + ret = (file1Stat->st_dev == file2Stat->st_dev) + && (file1Stat->st_ino == file2Stat->st_ino); + } +#endif + UTIL_TRACE_RET(ret); + return ret; } /* UTIL_isFIFO : distinguish named pipes */ int UTIL_isFIFO(const char* infilename) { + UTIL_TRACE_CALL("UTIL_isFIFO(%s)", infilename); /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L - stat_t statbuf; - if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) return 1; + { + stat_t statbuf; + if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) { + UTIL_TRACE_RET(1); + return 1; + } + } #endif (void)infilename; + UTIL_TRACE_RET(0); return 0; } @@ -278,21 +390,69 @@ int UTIL_isBlockDevStat(const stat_t* statbuf) int UTIL_isLink(const char* infilename) { + UTIL_TRACE_CALL("UTIL_isLink(%s)", infilename); /* macro guards, as defined in : https://linux.die.net/man/2/lstat */ #if PLATFORM_POSIX_VERSION >= 200112L - stat_t statbuf; - int const r = lstat(infilename, &statbuf); - if (!r && S_ISLNK(statbuf.st_mode)) return 1; + { + stat_t statbuf; + int const r = lstat(infilename, &statbuf); + if (!r && S_ISLNK(statbuf.st_mode)) { + UTIL_TRACE_RET(1); + return 1; + } + } #endif (void)infilename; + UTIL_TRACE_RET(0); return 0; } +static int g_fakeStdinIsConsole = 0; +static int g_fakeStderrIsConsole = 0; +static int g_fakeStdoutIsConsole = 0; + +int UTIL_isConsole(FILE* file) +{ + int ret; + UTIL_TRACE_CALL("UTIL_isConsole(%d)", fileno(file)); + if (file == stdin && g_fakeStdinIsConsole) + ret = 1; + else if (file == stderr && g_fakeStderrIsConsole) + ret = 1; + else if (file == stdout && g_fakeStdoutIsConsole) + ret = 1; + else + ret = IS_CONSOLE(file); + UTIL_TRACE_RET(ret); + return ret; +} + +void UTIL_fakeStdinIsConsole(void) +{ + g_fakeStdinIsConsole = 1; +} +void UTIL_fakeStdoutIsConsole(void) +{ + g_fakeStdoutIsConsole = 1; +} +void UTIL_fakeStderrIsConsole(void) +{ + g_fakeStderrIsConsole = 1; +} + U64 UTIL_getFileSize(const char* infilename) { stat_t statbuf; - if (!UTIL_stat(infilename, &statbuf)) return UTIL_FILESIZE_UNKNOWN; - return UTIL_getFileSizeStat(&statbuf); + UTIL_TRACE_CALL("UTIL_getFileSize(%s)", infilename); + if (!UTIL_stat(infilename, &statbuf)) { + UTIL_TRACE_RET(-1); + return UTIL_FILESIZE_UNKNOWN; + } + { + U64 const size = UTIL_getFileSizeStat(&statbuf); + UTIL_TRACE_RET((int)size); + return size; + } } U64 UTIL_getFileSizeStat(const stat_t* statbuf) @@ -369,11 +529,16 @@ U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles) { U64 total = 0; unsigned n; + UTIL_TRACE_CALL("UTIL_getTotalFileSize(%u)", nbFiles); for (n=0; n<nbFiles; n++) { U64 const size = UTIL_getFileSize(fileNamesTable[n]); - if (size == UTIL_FILESIZE_UNKNOWN) return UTIL_FILESIZE_UNKNOWN; + if (size == UTIL_FILESIZE_UNKNOWN) { + UTIL_TRACE_RET(-1); + return UTIL_FILESIZE_UNKNOWN; + } total += size; } + UTIL_TRACE_RET((int)total); return total; } @@ -418,7 +583,7 @@ readLinesFromFile(void* dst, size_t dstCapacity, while ( !feof(inputFile) ) { size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile); if (lineLength == 0) break; - assert(pos + lineLength < dstCapacity); + assert(pos + lineLength <= dstCapacity); /* '=' for inputFile not terminated with '\n' */ pos += lineLength; ++nbFiles; } @@ -509,6 +674,16 @@ FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize) return fnt; } +int UTIL_searchFileNamesTable(FileNamesTable* table, char const* name) { + size_t i; + for(i=0 ;i < table->tableSize; i++) { + if(!strcmp(table->fileNames[i], name)) { + return (int)i; + } + } + return -1; +} + void UTIL_refFilename(FileNamesTable* fnt, const char* filename) { assert(fnt->tableSize < fnt->tableCapacity); @@ -559,7 +734,7 @@ UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2) for( idx2=0 ; (idx2 < table2->tableSize) && table2->fileNames[idx2] && (pos < newTotalTableSize) ; ++idx2, ++newTableIdx) { size_t const curLen = strlen(table2->fileNames[idx2]); memcpy(buf+pos, table2->fileNames[idx2], curLen); - assert(newTableIdx <= newTable->tableSize); + assert(newTableIdx < newTable->tableSize); newTable->fileNames[newTableIdx] = buf+pos; pos += curLen+1; } } @@ -683,8 +858,11 @@ static int UTIL_prepareFileList(const char *dirName, ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; assert(newListSize >= 0); *bufStart = (char*)UTIL_realloc(*bufStart, (size_t)newListSize); - *bufEnd = *bufStart + newListSize; - if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + if (*bufStart != NULL) { + *bufEnd = *bufStart + newListSize; + } else { + free(path); closedir(dir); return 0; + } } if (*bufStart + *pos + pathLength < *bufEnd) { memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */ @@ -870,30 +1048,30 @@ static const char * trimPath(const char *pathname) static char* mallocAndJoin2Dir(const char *dir1, const char *dir2) { - const size_t dir1Size = strlen(dir1); - const size_t dir2Size = strlen(dir2); - char *outDirBuffer, *buffer, trailingChar; - assert(dir1 != NULL && dir2 != NULL); - outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2); - CONTROL(outDirBuffer != NULL); + { const size_t dir1Size = strlen(dir1); + const size_t dir2Size = strlen(dir2); + char *outDirBuffer, *buffer; - memcpy(outDirBuffer, dir1, dir1Size); - outDirBuffer[dir1Size] = '\0'; + outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2); + CONTROL(outDirBuffer != NULL); - if (dir2[0] == '.') - return outDirBuffer; + memcpy(outDirBuffer, dir1, dir1Size); + outDirBuffer[dir1Size] = '\0'; - buffer = outDirBuffer + dir1Size; - trailingChar = *(buffer - 1); - if (trailingChar != PATH_SEP) { - *buffer = PATH_SEP; - buffer++; - } - memcpy(buffer, dir2, dir2Size); - buffer[dir2Size] = '\0'; + if (dir2[0] == '.') + return outDirBuffer; + + buffer = outDirBuffer + dir1Size; + if (dir1Size > 0 && *(buffer - 1) != PATH_SEP) { + *buffer = PATH_SEP; + buffer++; + } + memcpy(buffer, dir2, dir2Size); + buffer[dir2Size] = '\0'; - return outDirBuffer; + return outDirBuffer; + } } /* this function will return NULL if input srcFileName is not valid name for mirrored output path */ @@ -999,7 +1177,7 @@ makeUniqueMirroredDestDirs(char** srcDirNames, unsigned nbFile, const char* outD trimPath(currDirName))) uniqueDirNr++; - /* we need maintain original src dir name instead of trimmed + /* we need to maintain original src dir name instead of trimmed * dir, so we can retrieve the original src dir's mode_t */ uniqueDirNames[uniqueDirNr - 1] = currDirName; } @@ -1378,6 +1556,9 @@ int UTIL_countCores(int logical) int UTIL_countCores(int logical) { + /* suppress unused parameter warning */ + (void)logical; + /* assume 1 */ return 1; } diff --git a/contrib/libs/zstd/programs/util.h b/contrib/libs/zstd/programs/util.h index add165d57c..4ec54137dd 100644 --- a/contrib/libs/zstd/programs/util.h +++ b/contrib/libs/zstd/programs/util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -171,10 +171,30 @@ int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions); int UTIL_isRegularFile(const char* infilename); int UTIL_isDirectory(const char* infilename); int UTIL_isSameFile(const char* file1, const char* file2); +int UTIL_isSameFileStat(const char* file1, const char* file2, const stat_t* file1Stat, const stat_t* file2Stat); int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); int UTIL_isLink(const char* infilename); int UTIL_isFIFO(const char* infilename); +/** + * Returns with the given file descriptor is a console. + * Allows faking whether stdin/stdout/stderr is a console + * using UTIL_fake*IsConsole(). + */ +int UTIL_isConsole(FILE* file); + +/** + * Pretends that stdin/stdout/stderr is a console for testing. + */ +void UTIL_fakeStdinIsConsole(void); +void UTIL_fakeStdoutIsConsole(void); +void UTIL_fakeStderrIsConsole(void); + +/** + * Emit traces for functions that read, or modify file metadata. + */ +void UTIL_traceFileStat(void); + #define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) U64 UTIL_getFileSize(const char* infilename); U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); @@ -248,7 +268,6 @@ UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2); /*! UTIL_expandFNT() : * read names from @fnt, and expand those corresponding to directories * update @fnt, now containing only file names, - * @return : 0 in case of success, 1 if error * note : in case of error, @fnt[0] is NULL */ void UTIL_expandFNT(FileNamesTable** fnt, int followLinks); @@ -269,6 +288,11 @@ UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames); */ FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize); +/*! UTIL_searchFileNamesTable() : + * Searched through entries in FileNamesTable for a specific name. + * @return : index of entry if found or -1 if not found + */ +int UTIL_searchFileNamesTable(FileNamesTable* table, char const* name); /*! UTIL_refFilename() : * Add a reference to read-only name into @fnt table. diff --git a/contrib/libs/zstd/programs/zstdcli.c b/contrib/libs/zstd/programs/zstdcli.c index bfe18c0c1b..93f75e21d9 100644 --- a/contrib/libs/zstd/programs/zstdcli.c +++ b/contrib/libs/zstd/programs/zstdcli.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Yann Collet, Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -27,8 +27,8 @@ /*-************************************ * Dependencies **************************************/ -#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */ -#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList, UTIL_isConsole */ #include <stdlib.h> /* getenv */ #include <string.h> /* strcmp, strlen */ #include <stdio.h> /* fprintf(), stdin, stdout, stderr */ @@ -46,17 +46,18 @@ # include "zstdcli_trace.h" #endif #include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */ +#include "fileio_asyncio.h" /*-************************************ * Constants **************************************/ -#define COMPRESSOR_NAME "zstd command line interface" +#define COMPRESSOR_NAME "Zstandard CLI" #ifndef ZSTD_VERSION # define ZSTD_VERSION "v" ZSTD_VERSION_STRING #endif #define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR +#define WELCOME_MESSAGE "*** %s (%i-bit) %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR #define ZSTD_ZSTDMT "zstdmt" #define ZSTD_UNZSTD "unzstd" @@ -124,6 +125,15 @@ static void checkLibVersion(void) } +/*! exeNameMatch() : + @return : a non-zero value if exeName matches test, excluding the extension + */ +static int exeNameMatch(const char* exeName, const char* test) +{ + return !strncmp(exeName, test, strlen(test)) && + (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); +} + /*-************************************ * Command Line **************************************/ @@ -133,143 +143,174 @@ static void checkLibVersion(void) */ static void usage(FILE* f, const char* programName) { - DISPLAY_F(f, "Usage : \n"); - DISPLAY_F(f, " %s [args] [FILE(s)] [-o file] \n", programName); + DISPLAY_F(f, "Compress or decompress the INPUT file(s); reads from STDIN if INPUT is `-` or not provided.\n\n"); + DISPLAY_F(f, "Usage: %s [OPTIONS...] [INPUT... | -] [-o OUTPUT]\n\n", programName); + DISPLAY_F(f, "Options:\n"); + DISPLAY_F(f, " -o OUTPUT Write output to a single file, OUTPUT.\n"); + DISPLAY_F(f, " -k, --keep Preserve INPUT file(s). [Default] \n"); + DISPLAY_F(f, " --rm Remove INPUT file(s) after successful (de)compression.\n"); +#ifdef ZSTD_GZCOMPRESS + if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ + DISPLAY_F(f, " -n, --no-name Do not store original filename when compressing.\n\n"); + } +#endif DISPLAY_F(f, "\n"); - DISPLAY_F(f, "FILE : a filename \n"); - DISPLAY_F(f, " with no FILE, or when FILE is - , read standard input\n"); - DISPLAY_F(f, "Arguments : \n"); #ifndef ZSTD_NOCOMPRESS - DISPLAY_F(f, " -# : # compression level (1-%d, default: %d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT); + DISPLAY_F(f, " -# Desired compression level, where `#` is a number between 1 and %d;\n", ZSTDCLI_CLEVEL_MAX); + DISPLAY_F(f, " lower numbers provide faster compression, higher numbers yield\n"); + DISPLAY_F(f, " better compression ratios. [Default: %d]\n\n", ZSTDCLI_CLEVEL_DEFAULT); #endif #ifndef ZSTD_NODECOMPRESS - DISPLAY_F(f, " -d : decompression \n"); + DISPLAY_F(f, " -d, --decompress Perform decompression.\n"); #endif - DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n"); - DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n"); - DISPLAY_F(f, " -f : disable input and output checks. Allows overwriting existing files,\n"); - DISPLAY_F(f, " input from console, output to stdout, operating on links,\n"); - DISPLAY_F(f, " block devices, etc.\n"); - DISPLAY_F(f, "--rm : remove source file(s) after successful de/compression \n"); - DISPLAY_F(f, " -k : preserve source file(s) (default) \n"); - DISPLAY_F(f, " -h/-H : display help/long help and exit \n"); + DISPLAY_F(f, " -D DICT Use DICT as the dictionary for compression or decompression.\n\n"); + DISPLAY_F(f, " -f, --force Disable input and output checks. Allows overwriting existing files,\n"); + DISPLAY_F(f, " receiving input from the console, printing ouput to STDOUT, and\n"); + DISPLAY_F(f, " operating on links, block devices, etc. Unrecognized formats will be\n"); + DISPLAY_F(f, " passed-through through as-is.\n\n"); + + DISPLAY_F(f, " -h Display short usage and exit.\n"); + DISPLAY_F(f, " -H, --help Display full help and exit.\n"); + DISPLAY_F(f, " -V, --version Display the program version and exit.\n"); + DISPLAY_F(f, "\n"); } static void usage_advanced(const char* programName) { DISPLAYOUT(WELCOME_MESSAGE); + DISPLAYOUT("\n"); usage(stdout, programName); - DISPLAYOUT( "\n"); - DISPLAYOUT( "Advanced arguments : \n"); - DISPLAYOUT( " -V : display Version number and exit \n"); - - DISPLAYOUT( " -c : write to standard output (even if it is the console) \n"); + DISPLAYOUT("Advanced options:\n"); + DISPLAYOUT(" -c, --stdout Write to STDOUT (even if it is a console) and keep the INPUT file(s).\n\n"); - DISPLAYOUT( " -v : verbose mode; specify multiple times to increase verbosity \n"); - DISPLAYOUT( " -q : suppress warnings; specify twice to suppress errors too \n"); - DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n"); - DISPLAYOUT( " note: any (de)compressed output to terminal will mix with progress counter text. \n"); + DISPLAYOUT(" -v, --verbose Enable verbose output; pass multiple times to increase verbosity.\n"); + DISPLAYOUT(" -q, --quiet Suppress warnings; pass twice to suppress errors.\n"); +#ifndef ZSTD_NOTRACE + DISPLAYOUT(" --trace LOG Log tracing information to LOG.\n"); +#endif + DISPLAYOUT("\n"); + DISPLAYOUT(" --[no-]progress Forcibly show/hide the progress counter. NOTE: Any (de)compressed\n"); + DISPLAYOUT(" output to terminal will mix with progress counter text.\n\n"); #ifdef UTIL_HAS_CREATEFILELIST - DISPLAYOUT( " -r : operate recursively on directories \n"); - DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n"); - DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n"); + DISPLAYOUT(" -r Operate recursively on directories.\n"); + DISPLAYOUT(" --filelist LIST Read a list of files to operate on from LIST.\n"); + DISPLAYOUT(" --output-dir-flat DIR Store processed files in DIR.\n"); #endif #ifdef UTIL_HAS_MIRRORFILELIST - DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n"); + DISPLAYOUT(" --output-dir-mirror DIR Store processed files in DIR, respecting original directory structure.\n"); #endif + if (AIO_supported()) + DISPLAYOUT(" --[no-]asyncio Use asynchronous IO. [Default: Enabled]\n"); - + DISPLAYOUT("\n"); #ifndef ZSTD_NOCOMPRESS - DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)"); + DISPLAYOUT(" --[no-]check Add XXH64 integrity checksums during compression. [Default: Add, Validate]\n"); #ifndef ZSTD_NODECOMPRESS - DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)."); + DISPLAYOUT(" If `-d` is present, ignore/validate checksums during decompression.\n"); #endif #else #ifdef ZSTD_NOCOMPRESS - DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); + DISPLAYOUT(" --[no-]check Ignore/validate checksums during decompression. [Default: Validate]"); #endif #endif /* ZSTD_NOCOMPRESS */ -#ifndef ZSTD_NOTRACE - DISPLAYOUT( "\n"); - DISPLAYOUT( "--trace FILE : log tracing information to FILE."); -#endif - DISPLAYOUT( "\n"); - - DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); + DISPLAYOUT("\n"); + DISPLAYOUT(" -- Treat remaining arguments after `--` as files.\n"); #ifndef ZSTD_NOCOMPRESS - DISPLAYOUT( "\n"); - DISPLAYOUT( "Advanced compression arguments : \n"); - DISPLAYOUT( "--ultra : enable levels beyond %i, up to %i (requires more memory) \n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); - DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog); - DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1); - DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n"); - DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n"); - DISPLAYOUT( "--patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n"); + DISPLAYOUT("\n"); + DISPLAYOUT("Advanced compression options:\n"); + DISPLAYOUT(" --ultra Enable levels beyond %i, up to %i; requires more memory.\n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); + DISPLAYOUT(" --fast[=#] Use to very fast compression levels. [Default: %u]\n", 1); +#ifdef ZSTD_GZCOMPRESS + if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ + DISPLAYOUT(" --best Compatibility alias for `-9`.\n"); + } +#endif + DISPLAYOUT(" --adapt Dynamically adapt compression level to I/O conditions.\n"); + DISPLAYOUT(" --long[=#] Enable long distance matching with window log #. [Default: %u]\n", g_defaultMaxWindowLog); + DISPLAYOUT(" --patch-from=REF Use REF as the reference point for Zstandard's diff engine. \n\n"); # ifdef ZSTD_MULTITHREAD - DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); - DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n"); - DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n"); - DISPLAYOUT( "--auto-threads={physical,logical} (default: physical} : use either physical cores or logical cores as default when specifying -T0 \n"); - DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n"); + DISPLAYOUT(" -T# Spawn # compression threads. [Default: 1; pass 0 for core count.]\n"); + DISPLAYOUT(" --single-thread Share a single thread for I/O and compression (slightly different than `-T1`).\n"); + DISPLAYOUT(" --auto-threads={physical|logical}\n"); + DISPLAYOUT(" Use physical/logical cores when using `-T0`. [Default: Physical]\n\n"); + DISPLAYOUT(" -B# Set job size to #. [Default: 0 (automatic)]\n"); + DISPLAYOUT(" --rsyncable Compress using a rsync-friendly method (`-B` sets block size). \n"); + DISPLAYOUT("\n"); # endif - DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n"); - DISPLAYOUT( "--stream-size=# : specify size of streaming input from `stdin` \n"); - DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n"); - DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); - DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n"); - DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n"); - - DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n"); + DISPLAYOUT(" --exclude-compressed Only compress files that are not already compressed.\n\n"); + + DISPLAYOUT(" --stream-size=# Specify size of streaming input from STDIN.\n"); + DISPLAYOUT(" --size-hint=# Optimize compression parameters for streaming input of approximately size #.\n"); + DISPLAYOUT(" --target-compressed-block-size=#\n"); + DISPLAYOUT(" Generate compressed blocks of approximately # size.\n\n"); + DISPLAYOUT(" --no-dictID Don't write `dictID` into the header (dictionary compression only).\n"); + DISPLAYOUT(" --[no-]compress-literals Force (un)compressed literals.\n"); + DISPLAYOUT(" --[no-]row-match-finder Explicitly enable/disable the fast, row-based matchfinder for\n"); + DISPLAYOUT(" the 'greedy', 'lazy', and 'lazy2' strategies.\n"); + + DISPLAYOUT("\n"); + DISPLAYOUT(" --format=zstd Compress files to the `.zst` format. [Default]\n"); #ifdef ZSTD_GZCOMPRESS - DISPLAYOUT( "--format=gzip : compress files to the .gz format \n"); + DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n"); #endif #ifdef ZSTD_LZMACOMPRESS - DISPLAYOUT( "--format=xz : compress files to the .xz format \n"); - DISPLAYOUT( "--format=lzma : compress files to the .lzma format \n"); + DISPLAYOUT(" --format=xz Compress files to the `.xz` format.\n"); + DISPLAYOUT(" --format=lzma Compress files to the `.lzma` format.\n"); #endif #ifdef ZSTD_LZ4COMPRESS - DISPLAYOUT( "--format=lz4 : compress files to the .lz4 format \n"); + DISPLAYOUT( " --format=lz4 Compress files to the `.lz4` format.\n"); #endif #endif /* !ZSTD_NOCOMPRESS */ #ifndef ZSTD_NODECOMPRESS - DISPLAYOUT( "\n"); - DISPLAYOUT( "Advanced decompression arguments : \n"); - DISPLAYOUT( " -l : print information about zstd compressed files \n"); - DISPLAYOUT( "--test : test compressed file integrity \n"); - DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n"); + DISPLAYOUT("\n"); + DISPLAYOUT("Advanced decompression options:\n"); + DISPLAYOUT(" -l Print information about Zstandard-compressed files.\n"); + DISPLAYOUT(" --test Test compressed file integrity.\n"); + DISPLAYOUT(" -M# Set the memory usage limit to # megabytes.\n"); # if ZSTD_SPARSE_DEFAULT - DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n"); + DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Enabled for files, disabled for STDOUT.]\n"); # else - DISPLAYOUT( "--[no-]sparse : sparse mode (default: disabled) \n"); + DISPLAYOUT(" --[no-]sparse Enable sparse mode. [Default: Disabled]\n"); # endif + { + char const* passThroughDefault = "Disabled"; + if (exeNameMatch(programName, ZSTD_CAT) || + exeNameMatch(programName, ZSTD_ZCAT) || + exeNameMatch(programName, ZSTD_GZCAT)) { + passThroughDefault = "Enabled"; + } + DISPLAYOUT(" --[no-]pass-through Pass through uncompressed files as-is. [Default: %s]\n", passThroughDefault); + } #endif /* ZSTD_NODECOMPRESS */ #ifndef ZSTD_NODICT - DISPLAYOUT( "\n"); - DISPLAYOUT( "Dictionary builder : \n"); - DISPLAYOUT( "--train ## : create a dictionary from a training set of files \n"); - DISPLAYOUT( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args \n"); - DISPLAYOUT( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args \n"); - DISPLAYOUT( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u) \n", g_defaultSelectivityLevel); - DISPLAYOUT( " -o DICT : DICT is dictionary name (default: %s) \n", g_defaultDictName); - DISPLAYOUT( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize); - DISPLAYOUT( "--dictID=# : force dictionary ID to specified value (default: random) \n"); + DISPLAYOUT("\n"); + DISPLAYOUT("Dictionary builder:\n"); + DISPLAYOUT(" --train Create a dictionary from a training set of files.\n\n"); + DISPLAYOUT(" --train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]]\n"); + DISPLAYOUT(" Use the cover algorithm (with optional arguments).\n"); + DISPLAYOUT(" --train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]]\n"); + DISPLAYOUT(" Use the fast cover algorithm (with optional arguments).\n\n"); + DISPLAYOUT(" --train-legacy[=s=#] Use the legacy algorithm with selectivity #. [Default: %u]\n", g_defaultSelectivityLevel); + DISPLAYOUT(" -o NAME Use NAME as dictionary name. [Default: %s]\n", g_defaultDictName); + DISPLAYOUT(" --maxdict=# Limit dictionary to specified size #. [Default: %u]\n", g_defaultMaxDictSize); + DISPLAYOUT(" --dictID=# Force dictionary ID to #. [Default: Random]\n"); #endif #ifndef ZSTD_NOBENCH - DISPLAYOUT( "\n"); - DISPLAYOUT( "Benchmark arguments : \n"); - DISPLAYOUT( " -b# : benchmark file(s), using # compression level (default: %d) \n", ZSTDCLI_CLEVEL_DEFAULT); - DISPLAYOUT( " -e# : test all compression levels successively from -b# to -e# (default: 1) \n"); - DISPLAYOUT( " -i# : minimum evaluation time in seconds (default: 3s) \n"); - DISPLAYOUT( " -B# : cut file into independent blocks of size # (default: no block) \n"); - DISPLAYOUT( " -S : output one benchmark result per input file (default: consolidated result) \n"); - DISPLAYOUT( "--priority=rt : set process priority to real-time \n"); + DISPLAYOUT("\n"); + DISPLAYOUT("Benchmark options:\n"); + DISPLAYOUT(" -b# Perform benchmarking with compression level #. [Default: %d]\n", ZSTDCLI_CLEVEL_DEFAULT); + DISPLAYOUT(" -e# Test all compression levels up to #; starting level is `-b#`. [Default: 1]\n"); + DISPLAYOUT(" -i# Set the minimum evaluation to time # seconds. [Default: 3]\n"); + DISPLAYOUT(" -B# Cut file into independent chunks of size #. [Default: No chunking]\n"); + DISPLAYOUT(" -S Output one benchmark result per input file. [Default: Consolidated result]\n"); + DISPLAYOUT(" --priority=rt Set process priority to real-time.\n"); #endif } @@ -296,18 +337,9 @@ static const char* lastNameFromPath(const char* path) return name; } -/*! exeNameMatch() : - @return : a non-zero value if exeName matches test, excluding the extension - */ -static int exeNameMatch(const char* exeName, const char* test) -{ - return !strncmp(exeName, test, strlen(test)) && - (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); -} - static void errorOut(const char* msg) { - DISPLAY("%s \n", msg); exit(1); + DISPLAYLEVEL(1, "%s \n", msg); exit(1); } /*! readU32FromCharChecked() : @@ -634,6 +666,11 @@ static void printVersion(void) #endif DISPLAYOUT("\n"); if (g_displayLevel >= 4) { + /* library versions */ + DISPLAYOUT("zlib version %s\n", FIO_zlibVersion()); + DISPLAYOUT("lz4 version %s\n", FIO_lz4Version()); + DISPLAYOUT("lzma version %s\n", FIO_lzmaVersion()); + /* posix support */ #ifdef _POSIX_C_SOURCE DISPLAYOUT("_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE); @@ -749,13 +786,13 @@ static unsigned init_nbThreads(void) { } else { \ argNb++; \ if (argNb >= argCount) { \ - DISPLAY("error: missing command argument \n"); \ + DISPLAYLEVEL(1, "error: missing command argument \n"); \ CLEAN_RETURN(1); \ } \ ptr = argv[argNb]; \ assert(ptr != NULL); \ if (ptr[0]=='-') { \ - DISPLAY("error: command cannot be separated from its argument by another command \n"); \ + DISPLAYLEVEL(1, "error: command cannot be separated from its argument by another command \n"); \ CLEAN_RETURN(1); \ } } } @@ -763,6 +800,18 @@ static unsigned init_nbThreads(void) { const char* __nb; \ NEXT_FIELD(__nb); \ val32 = readU32FromChar(&__nb); \ + if(*__nb != 0) { \ + errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ + } \ +} + +#define NEXT_TSIZE(valTsize) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + valTsize = readSizeTFromChar(&__nb); \ + if(*__nb != 0) { \ + errorOut("error: only numeric values with optional suffixes K, KB, KiB, M, MB, MiB are allowed"); \ + } \ } typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode; @@ -788,9 +837,7 @@ int main(int argCount, const char* argv[]) hasStdout = 0, ldmFlag = 0, main_pause = 0, - nbWorkers = 0, adapt = 0, - useRowMatchFinder = 0, adaptMin = MINCLEVEL, adaptMax = MAXCLEVEL, rsyncable = 0, @@ -799,18 +846,21 @@ int main(int argCount, const char* argv[]) separateFiles = 0, setRealTimePrio = 0, singleThread = 0, -#ifdef ZSTD_MULTITHREAD defaultLogicalCores = 0, -#endif showDefaultCParams = 0, ultra=0, - contentSize=1; + contentSize=1, + removeSrcFile=0; + ZSTD_paramSwitch_e useRowMatchFinder = ZSTD_ps_auto; + FIO_compressionType_t cType = FIO_zstdCompression; + unsigned nbWorkers = 0; double compressibility = 0.5; unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ size_t blockSize = 0; FIO_prefs_t* const prefs = FIO_createPreferences(); FIO_ctx_t* const fCtx = FIO_createContext(); + FIO_progressSetting_e progress = FIO_ps_auto; zstd_operation_mode operation = zom_compress; ZSTD_compressionParameters compressionParams; int cLevel = init_cLevel(); @@ -831,6 +881,7 @@ int main(int argCount, const char* argv[]) size_t streamSrcSize = 0; size_t targetCBlockSize = 0; size_t srcSizeHint = 0; + size_t nbInputFileNames = 0; int dictCLevel = g_defaultDictCLevel; unsigned dictSelect = g_defaultSelectivityLevel; #ifndef ZSTD_NODICT @@ -849,7 +900,7 @@ int main(int argCount, const char* argv[]) (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */ (void)memLimit; assert(argCount >= 1); - if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); } + if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAYLEVEL(1, "zstd: allocation error \n"); exit(1); } programName = lastNameFromPath(programName); #ifdef ZSTD_MULTITHREAD nbWorkers = init_nbThreads(); @@ -858,17 +909,20 @@ int main(int argCount, const char* argv[]) /* preset behaviors */ if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0; if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress; - if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */ - if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */ - if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like gzip */ - if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); } /* behave like gunzip, also supports multiple formats */ - if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */ - if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like lzma */ - if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */ - if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like xz */ - if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unxz, also supports multiple formats */ - if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like lz4 */ - if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; FIO_setPassThroughFlag(prefs, 1); outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */ + if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; FIO_setPassThroughFlag(prefs, 1); outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ + suffix = GZ_EXTENSION; cType = FIO_gzipCompression; removeSrcFile=1; + dictCLevel = cLevel = 6; /* gzip default is -6 */ + } + if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; removeSrcFile=1; } /* behave like gunzip, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; FIO_setPassThroughFlag(prefs, 1); outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; cType = FIO_lzmaCompression; removeSrcFile=1; } /* behave like lzma */ + if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; cType = FIO_lzmaCompression; removeSrcFile=1; } /* behave like unlzma, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; cType = FIO_xzCompression; removeSrcFile=1; } /* behave like xz */ + if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; cType = FIO_xzCompression; removeSrcFile=1; } /* behave like unxz, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; cType = FIO_lz4Compression; } /* behave like lz4 */ + if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; cType = FIO_lz4Compression; } /* behave like unlz4, also supports multiple formats */ memset(&compressionParams, 0, sizeof(compressionParams)); /* init crash handler */ @@ -905,43 +959,55 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); } if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; } if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } - if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; } + if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; continue; } if (!strcmp(argument, "--ultra")) { ultra=1; continue; } if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; } if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; } if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; } if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; } + if (!strcmp(argument, "--pass-through")) { FIO_setPassThroughFlag(prefs, 1); continue; } + if (!strcmp(argument, "--no-pass-through")) { FIO_setPassThroughFlag(prefs, 0); continue; } if (!strcmp(argument, "--test")) { operation=zom_test; continue; } + if (!strcmp(argument, "--asyncio")) { FIO_setAsyncIOFlag(prefs, 1); continue;} + if (!strcmp(argument, "--no-asyncio")) { FIO_setAsyncIOFlag(prefs, 0); continue;} if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; } if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; } - if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; } - if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } + if (!strcmp(argument, "--keep")) { removeSrcFile=0; continue; } + if (!strcmp(argument, "--rm")) { removeSrcFile=1; continue; } if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; } if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; } if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; } if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } - if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; } - if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; } + if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = ZSTD_ps_disable; continue; } + if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = ZSTD_ps_enable; continue; } if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; } if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } - if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; } + if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; cType = FIO_zstdCompression; continue; } #ifdef ZSTD_GZCOMPRESS - if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; } + if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; cType = FIO_gzipCompression; continue; } + if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ + if (!strcmp(argument, "--best")) { dictCLevel = cLevel = 9; continue; } + if (!strcmp(argument, "--no-name")) { /* ignore for now */; continue; } + } #endif #ifdef ZSTD_LZMACOMPRESS - if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; } - if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; } + if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; cType = FIO_lzmaCompression; continue; } + if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; cType = FIO_xzCompression; continue; } #endif #ifdef ZSTD_LZ4COMPRESS - if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; } + if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; cType = FIO_lz4Compression; continue; } #endif if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; } if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_ps_enable; continue; } if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_ps_disable; continue; } - if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; } - if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; } + if (!strcmp(argument, "--no-progress")) { progress = FIO_ps_never; continue; } + if (!strcmp(argument, "--progress")) { progress = FIO_ps_always; continue; } if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } + if (!strcmp(argument, "--fake-stdin-is-console")) { UTIL_fakeStdinIsConsole(); continue; } + if (!strcmp(argument, "--fake-stdout-is-console")) { UTIL_fakeStdoutIsConsole(); continue; } + if (!strcmp(argument, "--fake-stderr-is-console")) { UTIL_fakeStderrIsConsole(); continue; } + if (!strcmp(argument, "--trace-file-stat")) { UTIL_traceFileStat(); continue; } /* long commands with arguments */ #ifndef ZSTD_NODICT @@ -983,15 +1049,21 @@ int main(int argCount, const char* argv[]) if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; } if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; } if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; } - if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; } + if (longCommandWArg(&argument, "--block-size")) { NEXT_TSIZE(blockSize); continue; } if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; } if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; } - if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; } - if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; } - if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } - if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } - if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } -#ifdef ZSTD_MULTITHREAD + if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } ; cType = FIO_zstdCompression; continue; } + if (longCommandWArg(&argument, "--stream-size")) { NEXT_TSIZE(streamSrcSize); continue; } + if (longCommandWArg(&argument, "--target-compressed-block-size")) { NEXT_TSIZE(targetCBlockSize); continue; } + if (longCommandWArg(&argument, "--size-hint")) { NEXT_TSIZE(srcSizeHint); continue; } + if (longCommandWArg(&argument, "--output-dir-flat")) { + NEXT_FIELD(outDirName); + if (strlen(outDirName) == 0) { + DISPLAYLEVEL(1, "error: output dir cannot be empty string (did you mean to pass '.' instead?)\n"); + CLEAN_RETURN(1); + } + continue; + } if (longCommandWArg(&argument, "--auto-threads")) { const char* threadDefault = NULL; NEXT_FIELD(threadDefault); @@ -999,9 +1071,15 @@ int main(int argCount, const char* argv[]) defaultLogicalCores = 1; continue; } -#endif #ifdef UTIL_HAS_MIRRORFILELIST - if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } + if (longCommandWArg(&argument, "--output-dir-mirror")) { + NEXT_FIELD(outMirroredDirName); + if (strlen(outMirroredDirName) == 0) { + DISPLAYLEVEL(1, "error: output dir cannot be empty string (did you mean to pass '.' instead?)\n"); + CLEAN_RETURN(1); + } + continue; + } #endif #ifndef ZSTD_NOTRACE if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; } @@ -1018,6 +1096,8 @@ int main(int argCount, const char* argv[]) /* Invalid character following --long */ badusage(programName); CLEAN_RETURN(1); + } else { + ldmWindowLog = g_defaultMaxWindowLog; } /* Only set windowLog if not already set by --zstd */ if (compressionParams.windowLog == 0) @@ -1075,8 +1155,8 @@ int main(int argCount, const char* argv[]) { /* Display help */ case 'V': printVersion(); CLEAN_RETURN(0); /* Version Only */ - case 'H': - case 'h': usage_advanced(programName); CLEAN_RETURN(0); + case 'H': usage_advanced(programName); CLEAN_RETURN(0); + case 'h': usage(stdout, programName); CLEAN_RETURN(0); /* Compress */ case 'z': operation=zom_compress; argument++; break; @@ -1090,7 +1170,10 @@ int main(int argCount, const char* argv[]) operation=zom_decompress; argument++; break; /* Force stdout, even if stdout==console */ - case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break; + case 'c': forceStdout=1; outFileName=stdoutmark; removeSrcFile=0; argument++; break; + + /* do not store filename - gzip compatibility - nothing to do */ + case 'n': argument++; break; /* Use file content as dictionary */ case 'D': argument++; NEXT_FIELD(dictFileName); break; @@ -1105,7 +1188,7 @@ int main(int argCount, const char* argv[]) case 'q': g_displayLevel--; argument++; break; /* keep source file (default) */ - case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break; + case 'k': removeSrcFile=0; argument++; break; /* Checksum */ case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break; @@ -1164,7 +1247,7 @@ int main(int argCount, const char* argv[]) /* nb of threads (hidden option) */ case 'T': argument++; - nbWorkers = (int)readU32FromChar(&argument); + nbWorkers = readU32FromChar(&argument); break; /* Dictionary Selection level */ @@ -1204,18 +1287,21 @@ int main(int argCount, const char* argv[]) DISPLAYLEVEL(3, WELCOME_MESSAGE); #ifdef ZSTD_MULTITHREAD + if ((operation==zom_decompress) && (!singleThread) && (nbWorkers > 1)) { + DISPLAYLEVEL(2, "Warning : decompression does not support multi-threading\n"); + } if ((nbWorkers==0) && (!singleThread)) { /* automatically set # workers based on # of reported cpus */ if (defaultLogicalCores) { - nbWorkers = UTIL_countLogicalCores(); + nbWorkers = (unsigned)UTIL_countLogicalCores(); DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers); } else { - nbWorkers = UTIL_countPhysicalCores(); + nbWorkers = (unsigned)UTIL_countPhysicalCores(); DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers); } } #else - (void)singleThread; (void)nbWorkers; + (void)singleThread; (void)nbWorkers; (void)defaultLogicalCores; #endif g_utilDisplayLevel = g_displayLevel; @@ -1251,6 +1337,8 @@ int main(int argCount, const char* argv[]) } } + nbInputFileNames = filenames->tableSize; /* saving number of input files */ + if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */ UTIL_expandFNT(&filenames, followLinks); } @@ -1263,7 +1351,7 @@ int main(int argCount, const char* argv[]) int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel); CLEAN_RETURN(ret); #else - DISPLAY("file information is not supported \n"); + DISPLAYLEVEL(1, "file information is not supported \n"); CLEAN_RETURN(1); #endif } @@ -1271,14 +1359,18 @@ int main(int argCount, const char* argv[]) /* Check if benchmark is selected */ if (operation==zom_bench) { #ifndef ZSTD_NOBENCH + if (cType != FIO_zstdCompression) { + DISPLAYLEVEL(1, "benchmark mode is only compatible with zstd format \n"); + CLEAN_RETURN(1); + } benchParams.blockSize = blockSize; - benchParams.nbWorkers = nbWorkers; + benchParams.nbWorkers = (int)nbWorkers; benchParams.realTime = (unsigned)setRealTimePrio; benchParams.nbSeconds = bench_nbSeconds; benchParams.ldmFlag = ldmFlag; benchParams.ldmMinMatch = (int)g_ldmMinMatch; benchParams.ldmHashLog = (int)g_ldmHashLog; - benchParams.useRowMatchFinder = useRowMatchFinder; + benchParams.useRowMatchFinder = (int)useRowMatchFinder; if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog; } @@ -1299,15 +1391,18 @@ int main(int argCount, const char* argv[]) int c; DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]); for(c = cLevel; c <= cLevelLast; c++) { - BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams); + BMK_benchOutcome_t const bo = BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams); + if (!BMK_isSuccessful_benchOutcome(bo)) return 1; } } } else { for(; cLevel <= cLevelLast; cLevel++) { - BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams); + BMK_benchOutcome_t const bo = BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams); + if (!BMK_isSuccessful_benchOutcome(bo)) return 1; } } } else { for(; cLevel <= cLevelLast; cLevel++) { - BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams); + BMK_benchOutcome_t const bo = BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams); + if (!BMK_isSuccessful_benchOutcome(bo)) return 1; } } #else @@ -1349,24 +1444,34 @@ int main(int argCount, const char* argv[]) } #ifndef ZSTD_NODECOMPRESS - if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */ + if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; removeSrcFile=0; } /* test mode */ #endif /* No input filename ==> use stdin and stdout */ - if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark); - if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName) + if (filenames->tableSize == 0) { + /* It is possible that the input + was a number of empty directories. In this case + stdin and stdout should not be used */ + if (nbInputFileNames > 0 ){ + DISPLAYLEVEL(1, "please provide correct input file(s) or non-empty directories -- ignored \n"); + CLEAN_RETURN(0); + } + UTIL_refFilename(filenames, stdinmark); + } + + if (filenames->tableSize == 1 && !strcmp(filenames->fileNames[0], stdinmark) && !outFileName) outFileName = stdoutmark; /* when input is stdin, default output is stdout */ /* Check if input/output defined as console; trigger an error in this case */ if (!forceStdin - && !strcmp(filenames->fileNames[0], stdinmark) - && IS_CONSOLE(stdin) ) { + && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1) + && UTIL_isConsole(stdin) ) { DISPLAYLEVEL(1, "stdin is a console, aborting\n"); CLEAN_RETURN(1); } - if ( outFileName && !strcmp(outFileName, stdoutmark) - && IS_CONSOLE(stdout) - && !strcmp(filenames->fileNames[0], stdinmark) + if ( (!outFileName || !strcmp(outFileName, stdoutmark)) + && UTIL_isConsole(stdout) + && (UTIL_searchFileNamesTable(filenames, stdinmark) != -1) && !forceStdout && operation!=zom_decompress ) { DISPLAYLEVEL(1, "stdout is a console, aborting\n"); @@ -1384,25 +1489,35 @@ int main(int argCount, const char* argv[]) if (showDefaultCParams) { if (operation == zom_decompress) { - DISPLAY("error : can't use --show-default-cparams in decomrpession mode \n"); + DISPLAYLEVEL(1, "error : can't use --show-default-cparams in decompression mode \n"); CLEAN_RETURN(1); } } if (dictFileName != NULL && patchFromDictFileName != NULL) { - DISPLAY("error : can't use -D and --patch-from=# at the same time \n"); + DISPLAYLEVEL(1, "error : can't use -D and --patch-from=# at the same time \n"); CLEAN_RETURN(1); } if (patchFromDictFileName != NULL && filenames->tableSize > 1) { - DISPLAY("error : can't use --patch-from=# on multiple files \n"); + DISPLAYLEVEL(1, "error : can't use --patch-from=# on multiple files \n"); CLEAN_RETURN(1); } - /* No status message in pipe mode (stdin - stdout) */ + /* No status message by default when output is stdout */ hasStdout = outFileName && !strcmp(outFileName,stdoutmark); + if (hasStdout && (g_displayLevel==2)) g_displayLevel=1; + + /* when stderr is not the console, do not pollute it with progress updates (unless requested) */ + if (!UTIL_isConsole(stderr) && (progress!=FIO_ps_always)) progress=FIO_ps_never; + FIO_setProgressSetting(progress); - if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1; + /* don't remove source files when output is stdout */; + if (hasStdout && removeSrcFile) { + DISPLAYLEVEL(3, "Note: src files are not removed when output is stdout \n"); + removeSrcFile = 0; + } + FIO_setRemoveSrcFile(prefs, removeSrcFile); /* IO Stream/File */ FIO_setHasStdoutOutput(fCtx, hasStdout); @@ -1422,8 +1537,9 @@ int main(int argCount, const char* argv[]) FIO_setMemLimit(prefs, memLimit); if (operation==zom_compress) { #ifndef ZSTD_NOCOMPRESS + FIO_setCompressionType(prefs, cType); FIO_setContentSize(prefs, contentSize); - FIO_setNbWorkers(prefs, nbWorkers); + FIO_setNbWorkers(prefs, (int)nbWorkers); FIO_setBlockSize(prefs, (int)blockSize); if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog); FIO_setLdmFlag(prefs, (unsigned)ldmFlag); @@ -1431,8 +1547,8 @@ int main(int argCount, const char* argv[]) FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch); if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog); if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog); - FIO_setAdaptiveMode(prefs, (unsigned)adapt); - FIO_setUseRowMatchFinder(prefs, useRowMatchFinder); + FIO_setAdaptiveMode(prefs, adapt); + FIO_setUseRowMatchFinder(prefs, (int)useRowMatchFinder); FIO_setAdaptMin(prefs, adaptMin); FIO_setAdaptMax(prefs, adaptMax); FIO_setRsyncable(prefs, rsyncable); @@ -1440,6 +1556,7 @@ int main(int argCount, const char* argv[]) FIO_setTargetCBlockSize(prefs, targetCBlockSize); FIO_setSrcSizeHint(prefs, srcSizeHint); FIO_setLiteralCompressionMode(prefs, literalCompressionMode); + FIO_setSparseWrite(prefs, 0); if (adaptMin > cLevel) cLevel = adaptMin; if (adaptMax < cLevel) cLevel = adaptMax; @@ -1465,8 +1582,12 @@ int main(int argCount, const char* argv[]) else operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); #else - (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */ - DISPLAY("Compression not supported \n"); + /* these variables are only used when compression mode is enabled */ + (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; + (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; + (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; + (void)ZSTD_strategyMap; (void)useRowMatchFinder; (void)cType; + DISPLAYLEVEL(1, "Compression not supported \n"); #endif } else { /* decompression or test */ #ifndef ZSTD_NODECOMPRESS @@ -1476,7 +1597,7 @@ int main(int argCount, const char* argv[]) operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName); } #else - DISPLAY("Decompression not supported \n"); + DISPLAYLEVEL(1, "Decompression not supported \n"); #endif } diff --git a/contrib/libs/zstd/programs/zstdcli_trace.c b/contrib/libs/zstd/programs/zstdcli_trace.c index b3b977feb5..35075a52c4 100644 --- a/contrib/libs/zstd/programs/zstdcli_trace.c +++ b/contrib/libs/zstd/programs/zstdcli_trace.c @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/contrib/libs/zstd/programs/zstdcli_trace.h b/contrib/libs/zstd/programs/zstdcli_trace.h index 38c27dc04c..9c135d3ca8 100644 --- a/contrib/libs/zstd/programs/zstdcli_trace.h +++ b/contrib/libs/zstd/programs/zstdcli_trace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) Facebook, Inc. + * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the |