diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/zstd/programs | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/zstd/programs')
20 files changed, 9868 insertions, 0 deletions
diff --git a/contrib/libs/zstd/programs/README.md b/contrib/libs/zstd/programs/README.md new file mode 100644 index 0000000000..5570f90c3b --- /dev/null +++ b/contrib/libs/zstd/programs/README.md @@ -0,0 +1,301 @@ +Command Line Interface for Zstandard library +============================================ + +Command Line Interface (CLI) can be created using the `make` command without any additional parameters. +There are however other Makefile targets that create different variations of CLI: +- `zstd` : default CLI supporting gzip-like arguments; includes dictionary builder, benchmark, and supports decompression of legacy zstd formats +- `zstd_nolegacy` : Same as `zstd` but without support for legacy zstd formats +- `zstd-small` : CLI optimized for minimal size; no dictionary builder, no benchmark, and no support for legacy zstd formats +- `zstd-compress` : version of CLI which can only compress into zstd format +- `zstd-decompress` : version of CLI which can only decompress zstd format + + +### Compilation variables +`zstd` scope can be altered by modifying the following `make` variables : + +- __HAVE_THREAD__ : multithreading is automatically enabled when `pthread` is detected. + It's possible to disable multithread support, by setting `HAVE_THREAD=0`. + Example : `make zstd HAVE_THREAD=0` + It's also possible to force multithread support, using `HAVE_THREAD=1`. + In which case, linking stage will fail if neither `pthread` nor `windows.h` library can be found. + This is useful to ensure this feature is not silently disabled. + +- __ZSTD_LEGACY_SUPPORT__ : `zstd` can decompress files compressed by older versions of `zstd`. + Starting v0.8.0, all versions of `zstd` produce frames compliant with the [specification](../doc/zstd_compression_format.md), and are therefore compatible. + But older versions (< v0.8.0) produced different, incompatible, frames. + By default, `zstd` supports decoding legacy formats >= v0.4.0 (`ZSTD_LEGACY_SUPPORT=4`). + This can be altered by modifying this compilation variable. + `ZSTD_LEGACY_SUPPORT=1` means "support all formats >= v0.1.0". + `ZSTD_LEGACY_SUPPORT=2` means "support all formats >= v0.2.0", and so on. + `ZSTD_LEGACY_SUPPORT=0` means _DO NOT_ support any legacy format. + if `ZSTD_LEGACY_SUPPORT >= 8`, it's the same as `0`, since there is no legacy format after `7`. + Note : `zstd` only supports decoding older formats, and cannot generate any legacy format. + +- __HAVE_ZLIB__ : `zstd` can compress and decompress files in `.gz` format. + This is ordered through command `--format=gzip`. + Alternatively, symlinks named `gzip` or `gunzip` will mimic intended behavior. + `.gz` support is automatically enabled when `zlib` library is detected at build time. + It's possible to disable `.gz` support, by setting `HAVE_ZLIB=0`. + Example : `make zstd HAVE_ZLIB=0` + It's also possible to force compilation with zlib support, using `HAVE_ZLIB=1`. + In which case, linking stage will fail if `zlib` library cannot be found. + This is useful to prevent silent feature disabling. + +- __HAVE_LZMA__ : `zstd` can compress and decompress files in `.xz` and `.lzma` formats. + This is ordered through commands `--format=xz` and `--format=lzma` respectively. + Alternatively, symlinks named `xz`, `unxz`, `lzma`, or `unlzma` will mimic intended behavior. + `.xz` and `.lzma` support is automatically enabled when `lzma` library is detected at build time. + It's possible to disable `.xz` and `.lzma` support, by setting `HAVE_LZMA=0`. + Example : `make zstd HAVE_LZMA=0` + It's also possible to force compilation with lzma support, using `HAVE_LZMA=1`. + In which case, linking stage will fail if `lzma` library cannot be found. + This is useful to prevent silent feature disabling. + +- __HAVE_LZ4__ : `zstd` can compress and decompress files in `.lz4` formats. + This is ordered through commands `--format=lz4`. + Alternatively, symlinks named `lz4`, or `unlz4` will mimic intended behavior. + `.lz4` support is automatically enabled when `lz4` library is detected at build time. + It's possible to disable `.lz4` support, by setting `HAVE_LZ4=0` . + Example : `make zstd HAVE_LZ4=0` + It's also possible to force compilation with lz4 support, using `HAVE_LZ4=1`. + In which case, linking stage will fail if `lz4` library cannot be found. + This is useful to prevent silent feature disabling. + +- __ZSTD_NOBENCH__ : `zstd` cli will be compiled without its integrated benchmark module. + This can be useful to produce smaller binaries. + In this case, the corresponding unit can also be excluded from compilation target. + +- __ZSTD_NODICT__ : `zstd` cli will be compiled without support for the integrated dictionary builder. + This can be useful to produce smaller binaries. + In this case, the corresponding unit can also be excluded from compilation target. + +- __ZSTD_NOCOMPRESS__ : `zstd` cli will be compiled without support for compression. + The resulting binary will only be able to decompress files. + This can be useful to produce smaller binaries. + A corresponding `Makefile` target using this ability is `zstd-decompress`. + +- __ZSTD_NODECOMPRESS__ : `zstd` cli will be compiled without support for decompression. + The resulting binary will only be able to compress files. + This can be useful to produce smaller binaries. + A corresponding `Makefile` target using this ability is `zstd-compress`. + +- __BACKTRACE__ : `zstd` can display a stack backtrace when execution + generates a runtime exception. By default, this feature may be + degraded/disabled on some platforms unless additional compiler directives are + applied. When triaging a runtime issue, enabling this feature can provide + more context to determine the location of the fault. + Example : `make zstd BACKTRACE=1` + + +### Aggregation of parameters +CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. + + +### Symlink shortcuts +It's possible to invoke `zstd` through a symlink. +When the name of the symlink has a specific value, it triggers an associated behavior. +- `zstdmt` : compress using all cores available on local system. +- `zcat` : will decompress and output target file using any of the supported formats. `gzcat` and `zstdcat` are also equivalent. +- `gzip` : if zlib support is enabled, will mimic `gzip` by compressing file using `.gz` format, removing source file by default (use `--keep` to preserve). If zlib is not supported, triggers an error. +- `xz` : if lzma support is enabled, will mimic `xz` by compressing file using `.xz` format, removing source file by default (use `--keep` to preserve). If xz is not supported, triggers an error. +- `lzma` : if lzma support is enabled, will mimic `lzma` by compressing file using `.lzma` format, removing source file by default (use `--keep` to preserve). If lzma is not supported, triggers an error. +- `lz4` : if lz4 support is enabled, will mimic `lz4` by compressing file using `.lz4` format. If lz4 is not supported, triggers an error. +- `unzstd` and `unlz4` will decompress any of the supported format. +- `ungz`, `unxz` and `unlzma` will do the same, and will also remove source file by default (use `--keep` to preserve). + + +### Dictionary builder in Command Line Interface +Zstd offers a training mode, which can be used to tune the algorithm for a selected +type of data, by providing it with a few samples. The result of the training is stored +in a file selected with the `-o` option (default name is `dictionary`), +which can be loaded before compression and decompression. + +Using a dictionary, the compression ratio achievable on small data improves dramatically. +These compression gains are achieved while simultaneously providing faster compression and decompression speeds. +Dictionary work if there is some correlation in a family of small data (there is no universal dictionary). +Hence, deploying one dictionary per type of data will provide the greater benefits. +Dictionary gains are mostly effective in the first few KB. Then, the compression algorithm +will rely more and more on previously decoded content to compress the rest of the file. + +Usage of the dictionary builder and created dictionaries with CLI: + +1. Create the dictionary : `zstd --train PathToTrainingSet/* -o dictionaryName` +2. Compress with the dictionary: `zstd FILE -D dictionaryName` +3. Decompress with the dictionary: `zstd --decompress FILE.zst -D dictionaryName` + + +### Benchmark in Command Line Interface +CLI includes in-memory compression benchmark module for zstd. +The benchmark is conducted using given filenames. The files are read into memory and joined together. +It makes benchmark more precise as it eliminates I/O overhead. +Multiple filenames can be supplied, as multiple parameters, with wildcards, +or names of directories can be used as parameters with `-r` option. + +The benchmark measures ratio, compressed size, compression and decompression speed. +One can select compression levels starting from `-b` and ending with `-e`. +The `-i` parameter selects minimal time used for each of tested levels. + + +### Usage of Command Line Interface +The full list of options can be obtained with `-h` or `-H` parameter: +``` +Usage : + zstd [args] [FILE(s)] [-o file] + +FILE : a filename + with no FILE, or when FILE is - , read standard input +Arguments : + -# : # compression level (1-19, default: 3) + -d : decompression + -D DICT: use DICT as Dictionary for compression or decompression + -o file: result stored into `file` (only 1 output file) + -f : overwrite output without prompting, also (de)compress links +--rm : remove source file(s) after successful de/compression + -k : preserve source file(s) (default) + -h/-H : display help/long help and exit + +Advanced arguments : + -V : display Version number and exit + -c : write to standard output (even if it is the console) + -v : verbose mode; specify multiple times to increase verbosity + -q : suppress warnings; specify twice to suppress errors too +--no-progress : do not display the progress counter + -r : operate recursively on directories +--filelist FILE : read list of files to operate upon from FILE +--output-dir-flat DIR : processed files are stored into DIR +--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure +--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled). If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate). +-- : All arguments after "--" are treated as files + +Advanced compression arguments : +--ultra : enable levels beyond 19, up to 22 (requires more memory) +--long[=#]: enable long distance matching with given window log (default: 27) +--fast[=#]: switch to very fast compression levels (default: 1) +--adapt : dynamically adapt compression level to I/O conditions +--patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine + -T# : spawns # compression threads (default: 1, 0==# cores) + -B# : select size of each job (default: 0==automatic) +--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) +--rsyncable : compress using a rsync-friendly method (-B sets block size) +--exclude-compressed: only compress files that are not already compressed +--stream-size=# : specify size of streaming input from `stdin` +--size-hint=# optimize compression parameters for streaming input of approximately this size +--target-compressed-block-size=# : generate compressed block of approximately targeted size +--no-dictID : don't write dictID into header (dictionary compression only) +--[no-]compress-literals : force (un)compressed literals +--format=zstd : compress files to the .zst format (default) +--format=gzip : compress files to the .gz format +--format=xz : compress files to the .xz format +--format=lzma : compress files to the .lzma format +--format=lz4 : compress files to the .lz4 format + +Advanced decompression arguments : + -l : print information about zstd compressed files +--test : test compressed file integrity + -M# : Set a memory usage limit for decompression +--[no-]sparse : sparse mode (default: disabled) + +Dictionary builder : +--train ## : create a dictionary from a training set of files +--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args +--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args +--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: 9) + -o DICT : DICT is dictionary name (default: dictionary) +--maxdict=# : limit dictionary to specified size (default: 112640) +--dictID=# : force dictionary ID to specified value (default: random) + +Benchmark arguments : + -b# : benchmark file(s), using # compression level (default: 3) + -e# : test all compression levels successively from -b# to -e# (default: 1) + -i# : minimum evaluation time in seconds (default: 3s) + -B# : cut file into independent blocks of size # (default: no block) + -S : output one benchmark result per input file (default: consolidated result) +--priority=rt : set process priority to real-time +``` + +### Passing parameters through Environment Variables +There is no "generic" way to pass "any kind of parameter" to `zstd` in a pass-through manner. +Using environment variables for this purpose has security implications. +Therefore, this avenue is intentionally restricted and only supports `ZSTD_CLEVEL` and `ZSTD_NBTHREADS`. + +`ZSTD_CLEVEL` can be used to modify the default compression level of `zstd` +(usually set to `3`) to another value between 1 and 19 (the "normal" range). + +`ZSTD_NBTHREADS` can be used to specify a number of threads +that `zstd` will use for compression, which by default is `1`. +This functionality only exists when `zstd` is compiled with multithread support. +`0` means "use as many threads as detected cpu cores on local system". +The max # of threads is capped at `ZSTDMT_NBWORKERS_MAX`, +which is either 64 in 32-bit mode, or 256 for 64-bit environments. + +This functionality can be useful when `zstd` CLI is invoked in a way that doesn't allow passing arguments. +One such scenario is `tar --zstd`. +As `ZSTD_CLEVEL` and `ZSTD_NBTHREADS` only replace the default compression level +and number of threads respectively, they can both be overridden by corresponding command line arguments: +`-#` for compression level and `-T#` for number of threads. + + +### Long distance matching mode +The long distance matching mode, enabled with `--long`, is designed to improve +the compression ratio for files with long matches at a large distance (up to the +maximum window size, `128 MiB`) while still maintaining compression speed. + +Enabling this mode sets the window size to `128 MiB` and thus increases the memory +usage for both the compressor and decompressor. Performance in terms of speed is +dependent on long matches being found. Compression speed may degrade if few long +matches are found. Decompression speed usually improves when there are many long +distance matches. + +Below are graphs comparing the compression speed, compression ratio, and +decompression speed with and without long distance matching on an ideal use +case: a tar of four versions of clang (versions `3.4.1`, `3.4.2`, `3.5.0`, +`3.5.1`) with a total size of `244889600 B`. This is an ideal use case as there +are many long distance matches within the maximum window size of `128 MiB` (each +version is less than `128 MiB`). + +Compression Speed vs Ratio | Decompression Speed +---------------------------|--------------------- +![Compression Speed vs Ratio](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmCspeed.png "Compression Speed vs Ratio") | ![Decompression Speed](https://raw.githubusercontent.com/facebook/zstd/v1.3.3/doc/images/ldmDspeed.png "Decompression Speed") + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|-------------------------:|---------------------------:| +| `zstd -1` | `5.065` | `284.8 MB/s` | `759.3 MB/s` | +| `zstd -5` | `5.826` | `124.9 MB/s` | `674.0 MB/s` | +| `zstd -10` | `6.504` | `29.5 MB/s` | `771.3 MB/s` | +| `zstd -1 --long` | `17.426` | `220.6 MB/s` | `1638.4 MB/s` | +| `zstd -5 --long` | `19.661` | `165.5 MB/s` | `1530.6 MB/s` | +| `zstd -10 --long`| `21.949` | `75.6 MB/s` | `1632.6 MB/s` | + +On this file, the compression ratio improves significantly with minimal impact +on compression speed, and the decompression speed doubles. + +On the other extreme, compressing a file with few long distance matches (such as +the [Silesia compression corpus]) will likely lead to a deterioration in +compression speed (for lower levels) with minimal change in compression ratio. + +The below table illustrates this on the [Silesia compression corpus]. + +[Silesia compression corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia + +| Method | Compression ratio | Compression speed | Decompression speed | +|:-------|------------------:|------------------:|---------------------:| +| `zstd -1` | `2.878` | `231.7 MB/s` | `594.4 MB/s` | +| `zstd -1 --long` | `2.929` | `106.5 MB/s` | `517.9 MB/s` | +| `zstd -5` | `3.274` | `77.1 MB/s` | `464.2 MB/s` | +| `zstd -5 --long` | `3.319` | `51.7 MB/s` | `371.9 MB/s` | +| `zstd -10` | `3.523` | `16.4 MB/s` | `489.2 MB/s` | +| `zstd -10 --long`| `3.566` | `16.2 MB/s` | `415.7 MB/s` | + + +### zstdgrep + +`zstdgrep` is a utility which makes it possible to `grep` directly a `.zst` compressed file. +It's used the same way as normal `grep`, for example : +`zstdgrep pattern file.zst` + +`zstdgrep` is _not_ compatible with dictionary compression. + +To search into a file compressed with a dictionary, +it's necessary to decompress it using `zstd` or `zstdcat`, +and then pipe the result to `grep`. For example : +`zstdcat -D dictionary -qc -- file.zst | grep pattern` diff --git a/contrib/libs/zstd/programs/benchfn.c b/contrib/libs/zstd/programs/benchfn.c new file mode 100644 index 0000000000..1aadbdd913 --- /dev/null +++ b/contrib/libs/zstd/programs/benchfn.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Includes +***************************************/ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* memset */ +#include <assert.h> /* assert */ + +#include "timefn.h" /* UTIL_time_t, UTIL_getTime */ +#include "benchfn.h" + + +/* ************************************* +* Constants +***************************************/ +#define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + + +/* ************************************* +* Debug errors +***************************************/ +#if defined(DEBUG) && (DEBUG >= 1) +# include <stdio.h> /* fprintf */ +# define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +# define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } +#else +# define DEBUGOUTPUT(...) +#endif + + +/* error without displaying */ +#define RETURN_QUIET_ERROR(retValue, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error : "); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + return retValue; \ +} + +/* Abort execution if a condition is not met */ +#define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } } + + +/* ************************************* +* Benchmarking an arbitrary function +***************************************/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.error_tag_never_ever_use_directly == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + CONTROL(outcome.error_tag_never_ever_use_directly == 0); + return outcome.internal_never_ever_use_directly; +} + +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) +{ + CONTROL(outcome.error_tag_never_ever_use_directly != 0); + return outcome.error_result_never_ever_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.error_tag_never_ever_use_directly = 1; + b.error_result_never_ever_use_directly = errorResult; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.error_tag_never_ever_use_directly = 0; + outcome.internal_never_ever_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return a size_t value that errorFn can interpret */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop can be reported as zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, + unsigned nbLoops) +{ + size_t dstSize = 0; + nbLoops += !nbLoops; /* minimum nbLoops is 1 */ + + /* init */ + { size_t i; + for(i = 0; i < p.blockCount; i++) { + memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ + } } + + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + unsigned loopNb, blockNb; + if (p.initFn != NULL) p.initFn(p.initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < p.blockCount; blockNb++) { + size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], + p.dstBuffers[blockNb], p.dstCapacities[blockNb], + p.benchPayload); + if (loopNb == 0) { + if (p.blockResults != NULL) p.blockResults[blockNb] = res; + if ((p.errorFn != NULL) && (p.errorFn(res))) { + RETURN_QUIET_ERROR(BMK_runOutcome_error(res), + "Function benchmark failed on block %u (of size %u) with error %i", + blockNb, (unsigned)p.srcSizes[blockNb], (int)res); + } + dstSize += res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ + + { PTime const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = (double)totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } +} + + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + PTime timeSpent_ns; + PTime timeBudget_ns; + PTime runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } + +BMK_timedFnState_t* +BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) +{ + typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ + typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ + size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; + if (buffer == NULL) return NULL; + if (size < sizeof(struct BMK_timedFnState_s)) return NULL; + if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, + BMK_benchParams_t p) +{ + PTime const runBudget_ns = cont->runBudget_ns; + PTime const runTimeMin_ns = runBudget_ns / 2; + int completed = 0; + BMK_runTime_t bestRunTime = cont->fastestRun; + + while (!completed) { + BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); + + if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return runResult; + } + + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += (unsigned long long)loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; + } else { + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + + if(loopDuration_ns < runTimeMin_ns) { + /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ + assert(completed == 0); + continue; + } else { + if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + completed = 1; + } + } + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); +} diff --git a/contrib/libs/zstd/programs/benchfn.h b/contrib/libs/zstd/programs/benchfn.h new file mode 100644 index 0000000000..590f292eaa --- /dev/null +++ b/contrib/libs/zstd/programs/benchfn.h @@ -0,0 +1,183 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* benchfn : + * benchmark any function on a set of input + * providing result in nanoSecPerRun + * or detecting and returning an error + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_FN_H_23876 +#define BENCH_FN_H_23876 + +/* === Dependencies === */ +#include <stddef.h> /* size_t */ + + +/* ==== Benchmark any function, iterated on a set of blocks ==== */ + +/* BMK_runTime_t: valid result return type */ + +typedef struct { + double nanoSecPerRun; /* time per iteration (over all blocks) */ + size_t sumOfReturn; /* sum of return values */ +} BMK_runTime_t; + + +/* BMK_runOutcome_t: + * type expressing the outcome of a benchmark run by BMK_benchFunction(), + * which can be either valid or invalid. + * benchmark outcome can be invalid if errorFn is provided. + * BMK_runOutcome_t must be considered "opaque" : never access its members directly. + * Instead, use its assigned methods : + * BMK_isSuccessful_runOutcome, BMK_extract_runTime, BMK_extract_errorResult. + * The structure is only described here to allow its allocation on stack. */ + +typedef struct { + BMK_runTime_t internal_never_ever_use_directly; + size_t error_result_never_ever_use_directly; + int error_tag_never_ever_use_directly; +} BMK_runOutcome_t; + + +/* prototypes for benchmarked functions */ +typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); +typedef size_t (*BMK_initFn_t)(void* initPayload); +typedef unsigned (*BMK_errorFn_t)(size_t); + + +/* BMK_benchFunction() parameters are provided via the following structure. + * A structure is preferable for readability, + * as the number of parameters required is fairly large. + * No initializer is provided, because it doesn't make sense to provide some "default" : + * all parameters must be specified by the caller. + * optional parameters are labelled explicitly, and accept value NULL when not used */ +typedef struct { + BMK_benchFn_t benchFn; /* the function to benchmark, over the set of blocks */ + void* benchPayload; /* pass custom parameters to benchFn : + * (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) */ + BMK_initFn_t initFn; /* (*initFn)(initPayload) is run once per run, at the beginning. */ + void* initPayload; /* Both arguments can be NULL, in which case nothing is run. */ + BMK_errorFn_t errorFn; /* errorFn will check each return value of benchFn over each block, to determine if it failed or not. + * errorFn can be NULL, in which case no check is performed. + * errorFn must return 0 when benchFn was successful, and >= 1 if it detects an error. + * Execution is stopped as soon as an error is detected. + * the triggering return value can be retrieved using BMK_extract_errorResult(). */ + size_t blockCount; /* number of blocks to operate benchFn on. + * It's also the size of all array parameters : + * srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults */ + const void *const * srcBuffers; /* read-only array of buffers to be operated on by benchFn */ + const size_t* srcSizes; /* read-only array containing sizes of srcBuffers */ + void *const * dstBuffers; /* array of buffers to be written into by benchFn. This array is not optional, it must be provided even if unused by benchfn. */ + const size_t* dstCapacities; /* read-only array containing capacities of dstBuffers. This array must be present. */ + size_t* blockResults; /* Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. */ +} BMK_benchParams_t; + + +/* BMK_benchFunction() : + * This function benchmarks benchFn and initFn, providing a result. + * + * params : see description of BMK_benchParams_t above. + * nbLoops: defines number of times benchFn is run over the full set of blocks. + * Minimum value is 1. A 0 is interpreted as a 1. + * + * @return: can express either an error or a successful result. + * Use BMK_isSuccessful_runOutcome() to check if benchmark was successful. + * If yes, extract the result with BMK_extract_runTime(), + * it will contain : + * .sumOfReturn : the sum of all return values of benchFn through all of blocks + * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) + * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, + * in which case, this value will be the total amount of bytes written into dstBuffer. + * + * blockResults : when provided (!= NULL), and when benchmark is successful, + * params.blockResults contains all return values of `benchFn` over all blocks. + * when provided (!= NULL), and when benchmark failed, + * params.blockResults contains return values of `benchFn` over all blocks preceding and including the failed block. + */ +BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t params, unsigned nbLoops); + + + +/* check first if the benchmark was successful or not */ +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); + +/* If the benchmark was successful, extract the result. + * note : this function will abort() program execution if benchmark failed ! + * always check if benchmark was successful first ! + */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); + +/* when benchmark failed, it means one invocation of `benchFn` failed. + * The failure was detected by `errorFn`, operating on return values of `benchFn`. + * Returns the faulty return value. + * note : this function will abort() program execution if benchmark did not failed. + * always check if benchmark failed first ! + */ +size_t BMK_extract_errorResult(BMK_runOutcome_t outcome); + + + +/* ==== Benchmark any function, returning intermediate results ==== */ + +/* state information tracking benchmark session */ +typedef struct BMK_timedFnState_s BMK_timedFnState_t; + +/* BMK_benchTimedFn() : + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() + */ +BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* timedFnState, + BMK_benchParams_t params); + +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); +void BMK_freeTimedFnState(BMK_timedFnState_t* state); + + +/* BMK_timedFnState_shell and BMK_initStatic_timedFnState() : + * Makes it possible to statically allocate a BMK_timedFnState_t on stack. + * BMK_timedFnState_shell is only there to allocate space, + * never ever access its members. + * BMK_timedFnState_t() actually accepts any buffer. + * It will check if provided buffer is large enough and is correctly aligned, + * and will return NULL if conditions are not respected. + */ +#define BMK_TIMEDFNSTATE_SIZE 64 +typedef union { + char never_access_space[BMK_TIMEDFNSTATE_SIZE]; + long long alignment_enforcer; /* must be aligned on 8-bytes boundaries */ +} BMK_timedFnState_shell; +BMK_timedFnState_t* BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms); + + +#endif /* BENCH_FN_H_23876 */ + +#if defined (__cplusplus) +} +#endif diff --git a/contrib/libs/zstd/programs/benchzstd.c b/contrib/libs/zstd/programs/benchzstd.c new file mode 100644 index 0000000000..9dc76a6f3e --- /dev/null +++ b/contrib/libs/zstd/programs/benchzstd.c @@ -0,0 +1,892 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ************************************** +* Tuning parameters +****************************************/ +#ifndef BMK_TIMETEST_DEFAULT_S /* default minimum time per test */ +#define BMK_TIMETEST_DEFAULT_S 3 +#endif + + +/* ************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_sleep */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* memset, strerror */ +#include <stdio.h> /* fprintf, fopen */ +#include <errno.h> +#include <assert.h> /* assert */ + +#include "timefn.h" /* UTIL_time_t */ +#include "benchfn.h" +#include "../lib/common/mem.h" +#ifndef ZSTD_STATIC_LINKING_ONLY +#define ZSTD_STATIC_LINKING_ONLY +#endif +#include "../lib/zstd.h" +#include "datagen.h" /* RDG_genBuffer */ +#ifndef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#endif +#include <contrib/libs/xxhash/xxhash.h> +#include "benchzstd.h" +#include "../lib/zstd_errors.h" + + +/* ************************************* +* Constants +***************************************/ +#ifndef ZSTD_GIT_COMMIT +# define ZSTD_GIT_COMMIT_STRING "" +#else +# define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT) +#endif + +#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ +#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */ +#define COOLPERIOD_SEC 10 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BMK_RUNTEST_DEFAULT_MS 1000 + +static const size_t maxMemory = (sizeof(size_t)==4) ? + /* 32-bit */ (2 GB - 64 MB) : + /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + + +/* ************************************* +* console display +***************************************/ +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush(NULL); } +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +/* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ +#define OUTPUT(...) { fprintf(stdout, __VA_ARGS__); fflush(NULL); } +#define OUTPUTLEVEL(l, ...) if (displayLevel>=l) { OUTPUT(__VA_ARGS__); } + + +/* ************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } + +#define RETURN_ERROR_INT(errorNum, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + return errorNum; \ +} + +#define CHECK_Z(zf) { \ + size_t const zerr = zf; \ + if (ZSTD_isError(zerr)) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAY("Error : "); \ + DISPLAY("%s failed : %s", \ + #zf, ZSTD_getErrorName(zerr)); \ + DISPLAY(" \n"); \ + exit(1); \ + } \ +} + +#define RETURN_ERROR(errorNum, retType, ...) { \ + retType r; \ + memset(&r, 0, sizeof(retType)); \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", errorNum); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + r.tag = errorNum; \ + return r; \ +} + + +/* ************************************* +* Benchmark Parameters +***************************************/ + +BMK_advancedParams_t BMK_initAdvancedParams(void) { + BMK_advancedParams_t const res = { + BMK_both, /* mode */ + BMK_TIMETEST_DEFAULT_S, /* nbSeconds */ + 0, /* blockSize */ + 0, /* nbWorkers */ + 0, /* realTime */ + 0, /* additionalParam */ + 0, /* ldmFlag */ + 0, /* ldmMinMatch */ + 0, /* ldmHashLog */ + 0, /* ldmBuckSizeLog */ + 0, /* ldmHashRateLog */ + ZSTD_ps_auto, /* literalCompressionMode */ + 0 /* useRowMatchFinder */ + }; + return res; +} + + +/* ******************************************************** +* Bench functions +**********************************************************/ +typedef struct { + const void* srcPtr; + size_t srcSize; + void* cPtr; + size_t cRoom; + size_t cSize; + void* resPtr; + size_t resSize; +} blockParam_t; + +#undef MIN +#undef MAX +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static void +BMK_initCCtx(ZSTD_CCtx* ctx, + const void* dictBuffer, size_t dictBufferSize, + int cLevel, + const ZSTD_compressionParameters* comprParams, + const BMK_advancedParams_t* adv) +{ + ZSTD_CCtx_reset(ctx, ZSTD_reset_session_and_parameters); + if (adv->nbWorkers==1) { + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, 0)); + } else { + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_nbWorkers, adv->nbWorkers)); + } + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, cLevel)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_useRowMatchFinder, adv->useRowMatchFinder)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_enableLongDistanceMatching, adv->ldmFlag)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmMinMatch, adv->ldmMinMatch)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashLog, adv->ldmHashLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmBucketSizeLog, adv->ldmBucketSizeLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_ldmHashRateLog, adv->ldmHashRateLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_windowLog, (int)comprParams->windowLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_hashLog, (int)comprParams->hashLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_chainLog, (int)comprParams->chainLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_searchLog, (int)comprParams->searchLog)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_minMatch, (int)comprParams->minMatch)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_targetLength, (int)comprParams->targetLength)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_literalCompressionMode, (int)adv->literalCompressionMode)); + CHECK_Z(ZSTD_CCtx_setParameter(ctx, ZSTD_c_strategy, (int)comprParams->strategy)); + CHECK_Z(ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize)); +} + +static void BMK_initDCtx(ZSTD_DCtx* dctx, + const void* dictBuffer, size_t dictBufferSize) { + CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters)); + CHECK_Z(ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize)); +} + + +typedef struct { + ZSTD_CCtx* cctx; + const void* dictBuffer; + size_t dictBufferSize; + int cLevel; + const ZSTD_compressionParameters* comprParams; + const BMK_advancedParams_t* adv; +} BMK_initCCtxArgs; + +static size_t local_initCCtx(void* payload) { + BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload; + BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv); + return 0; +} + +typedef struct { + ZSTD_DCtx* dctx; + const void* dictBuffer; + size_t dictBufferSize; +} BMK_initDCtxArgs; + +static size_t local_initDCtx(void* payload) { + BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload; + BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize); + return 0; +} + + +/* `addArgs` is the context */ +static size_t local_defaultCompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstSize, + void* addArgs) +{ + ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs; + return ZSTD_compress2(cctx, dstBuffer, dstSize, srcBuffer, srcSize); +} + +/* `addArgs` is the context */ +static size_t local_defaultDecompress( + const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, + void* addArgs) +{ + size_t moreToFlush = 1; + ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + in.src = srcBuffer; in.size = srcSize; in.pos = 0; + out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0; + while (moreToFlush) { + if(out.pos == out.size) { + return (size_t)-ZSTD_error_dstSize_tooSmall; + } + moreToFlush = ZSTD_decompressStream(dctx, &out, &in); + if (ZSTD_isError(moreToFlush)) { + return moreToFlush; + } + } + return out.pos; + +} + + +/* ================================================================= */ +/* Benchmark Zstandard, mem-to-mem scenarios */ +/* ================================================================= */ + +int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome) +{ + return outcome.tag == 0; +} + +BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_benchOutcome_t BMK_benchOutcome_error(void) +{ + BMK_benchOutcome_t b; + memset(&b, 0, sizeof(b)); + b.tag = 1; + return b; +} + +static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result) +{ + BMK_benchOutcome_t b; + b.tag = 0; + b.internal_never_use_directly = result; + return b; +} + + +/* benchMem with no allocation */ +static BMK_benchOutcome_t +BMK_benchMemAdvancedNoAlloc( + const void** srcPtrs, size_t* srcSizes, + void** cPtrs, size_t* cCapacities, size_t* cSizes, + void** resPtrs, size_t* resSizes, + void** resultBufferPtr, void* compressedBuffer, + size_t maxCompressedSize, + BMK_timedFnState_t* timeStateCompress, + BMK_timedFnState_t* timeStateDecompress, + + const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + const int cLevel, + const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv) +{ + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize); /* avoid div by 0 */ + BMK_benchResult_t benchResult; + size_t const loadedCompressedSize = srcSize; + size_t cSize = 0; + double ratio = 0.; + U32 nbBlocks; + + assert(cctx != NULL); assert(dctx != NULL); + + /* init */ + memset(&benchResult, 0, sizeof(benchResult)); + if (strlen(displayName)>17) displayName += strlen(displayName) - 17; /* display last 17 characters */ + if (adv->mode == BMK_decodeOnly) { /* benchmark only decompression : source must be already compressed */ + const char* srcPtr = (const char*)srcBuffer; + U64 totalDSize64 = 0; + U32 fileNb; + for (fileNb=0; fileNb<nbFiles; fileNb++) { + U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]); + if (fSize64==0) RETURN_ERROR(32, BMK_benchOutcome_t, "Impossible to determine original size "); + totalDSize64 += fSize64; + srcPtr += fileSizes[fileNb]; + } + { size_t const decodedSize = (size_t)totalDSize64; + assert((U64)decodedSize == totalDSize64); /* check overflow */ + free(*resultBufferPtr); + *resultBufferPtr = malloc(decodedSize); + if (!(*resultBufferPtr)) { + RETURN_ERROR(33, BMK_benchOutcome_t, "not enough memory"); + } + if (totalDSize64 > decodedSize) { /* size_t overflow */ + free(*resultBufferPtr); + RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large"); + } + cSize = srcSize; + srcSize = decodedSize; + ratio = (double)srcSize / (double)cSize; + } + } + + /* Init data blocks */ + { const char* srcPtr = (const char*)srcBuffer; + char* cPtr = (char*)compressedBuffer; + char* resPtr = (char*)(*resultBufferPtr); + U32 fileNb; + for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) { + size_t remaining = fileSizes[fileNb]; + U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize); + U32 const blockEnd = nbBlocks + nbBlocksforThisFile; + for ( ; nbBlocks<blockEnd; nbBlocks++) { + size_t const thisBlockSize = MIN(remaining, blockSize); + srcPtrs[nbBlocks] = srcPtr; + srcSizes[nbBlocks] = thisBlockSize; + cPtrs[nbBlocks] = cPtr; + cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize); + resPtrs[nbBlocks] = resPtr; + resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize; + srcPtr += thisBlockSize; + cPtr += cCapacities[nbBlocks]; + resPtr += thisBlockSize; + remaining -= thisBlockSize; + if (adv->mode == BMK_decodeOnly) { + cSizes[nbBlocks] = thisBlockSize; + benchResult.cSize = thisBlockSize; + } } } } + + /* warming up `compressedBuffer` */ + if (adv->mode == BMK_decodeOnly) { + memcpy(compressedBuffer, srcBuffer, loadedCompressedSize); + } else { + RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1); + } + + /* Bench */ + { U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0); +# define NB_MARKS 4 + const char* marks[NB_MARKS] = { " |", " /", " =", " \\" }; + U32 markNb = 0; + int compressionCompleted = (adv->mode == BMK_decodeOnly); + int decompressionCompleted = (adv->mode == BMK_compressOnly); + BMK_benchParams_t cbp, dbp; + BMK_initCCtxArgs cctxprep; + BMK_initDCtxArgs dctxprep; + + cbp.benchFn = local_defaultCompress; /* ZSTD_compress2 */ + cbp.benchPayload = cctx; + cbp.initFn = local_initCCtx; /* BMK_initCCtx */ + cbp.initPayload = &cctxprep; + cbp.errorFn = ZSTD_isError; + cbp.blockCount = nbBlocks; + cbp.srcBuffers = srcPtrs; + cbp.srcSizes = srcSizes; + cbp.dstBuffers = cPtrs; + cbp.dstCapacities = cCapacities; + cbp.blockResults = cSizes; + + cctxprep.cctx = cctx; + cctxprep.dictBuffer = dictBuffer; + cctxprep.dictBufferSize = dictBufferSize; + cctxprep.cLevel = cLevel; + cctxprep.comprParams = comprParams; + cctxprep.adv = adv; + + dbp.benchFn = local_defaultDecompress; + dbp.benchPayload = dctx; + dbp.initFn = local_initDCtx; + dbp.initPayload = &dctxprep; + dbp.errorFn = ZSTD_isError; + dbp.blockCount = nbBlocks; + dbp.srcBuffers = (const void* const *) cPtrs; + dbp.srcSizes = cSizes; + dbp.dstBuffers = resPtrs; + dbp.dstCapacities = resSizes; + dbp.blockResults = NULL; + + dctxprep.dctx = dctx; + dctxprep.dictBuffer = dictBuffer; + dctxprep.dictBufferSize = dictBufferSize; + + OUTPUTLEVEL(2, "\r%70s\r", ""); /* blank line */ + assert(srcSize < UINT_MAX); + OUTPUTLEVEL(2, "%2s-%-17.17s :%10u -> \r", marks[markNb], displayName, (unsigned)srcSize); + + while (!(compressionCompleted && decompressionCompleted)) { + if (!compressionCompleted) { + BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp); + + if (!BMK_isSuccessful_runOutcome(cOutcome)) { + return BMK_benchOutcome_error(); + } + + { BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome); + cSize = cResult.sumOfReturn; + ratio = (double)srcSize / (double)cSize; + { BMK_benchResult_t newResult; + newResult.cSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun); + benchResult.cSize = cSize; + if (newResult.cSpeed > benchResult.cSpeed) + benchResult.cSpeed = newResult.cSpeed; + } } + + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + assert(cSize < UINT_MAX); + OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s \r", + marks[markNb], displayName, + (unsigned)srcSize, (unsigned)cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT); + } + compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress); + } + + if(!decompressionCompleted) { + BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp); + + if(!BMK_isSuccessful_runOutcome(dOutcome)) { + return BMK_benchOutcome_error(); + } + + { BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome); + U64 const newDSpeed = (U64)((double)srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun); + if (newDSpeed > benchResult.dSpeed) + benchResult.dSpeed = newDSpeed; + } + + { int const ratioAccuracy = (ratio < 10.) ? 3 : 2; + OUTPUTLEVEL(2, "%2s-%-17.17s :%10u ->%10u (x%5.*f), %6.*f MB/s, %6.1f MB/s\r", + marks[markNb], displayName, + (unsigned)srcSize, (unsigned)cSize, + ratioAccuracy, ratio, + benchResult.cSpeed < (10 * MB_UNIT) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT, + (double)benchResult.dSpeed / MB_UNIT); + } + decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress); + } + markNb = (markNb+1) % NB_MARKS; + } /* while (!(compressionCompleted && decompressionCompleted)) */ + + /* CRC Checking */ + { const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr); + U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) { + size_t u; + DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x \n", + displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u<srcSize; u++) { + if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) { + unsigned segNb, bNb, pos; + size_t bacc = 0; + DISPLAY("Decoding error at pos %u ", (unsigned)u); + for (segNb = 0; segNb < nbBlocks; segNb++) { + if (bacc + srcSizes[segNb] > u) break; + bacc += srcSizes[segNb]; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos); + { size_t const lowest = (u>5) ? 5 : u; + size_t n; + DISPLAY("origin: "); + for (n=lowest; n>0; n--) + DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u-n]); + DISPLAY(" :%02X: ", ((const BYTE*)srcBuffer)[u]); + for (n=1; n<3; n++) + DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]); + DISPLAY(" \n"); + DISPLAY("decode: "); + for (n=lowest; n>0; n--) + DISPLAY("%02X ", resultBuffer[u-n]); + DISPLAY(" :%02X: ", resultBuffer[u]); + for (n=1; n<3; n++) + DISPLAY("%02X ", resultBuffer[u+n]); + DISPLAY(" \n"); + } + break; + } + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } + } /* for (u=0; u<srcSize; u++) */ + } /* if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) */ + } /* CRC Checking */ + + if (displayLevel == 1) { /* hidden display mode -q, used by python speed benchmark */ + double const cSpeed = (double)benchResult.cSpeed / MB_UNIT; + double const dSpeed = (double)benchResult.dSpeed / MB_UNIT; + if (adv->additionalParam) { + OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam); + } else { + OUTPUT("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } + } + + OUTPUTLEVEL(2, "%2i#\n", cLevel); + } /* Bench */ + + benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx); + return BMK_benchOutcome_setValidResult(benchResult); +} + +BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, + const size_t* fileSizes, unsigned nbFiles, + int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + int displayLevel, const char* displayName, const BMK_advancedParams_t* adv) + +{ + int const dstParamsError = !dstBuffer ^ !dstCapacity; /* must be both NULL or none */ + + size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + + /* these are the blockTable parameters, just split up */ + const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*)); + size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + + void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); + size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*)); + size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t)); + + BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); + BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS); + + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + + const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024); + + void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize); + void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer; + + BMK_benchOutcome_t outcome = BMK_benchOutcome_error(); /* error by default */ + + void* resultBuffer = srcSize ? malloc(srcSize) : NULL; + + int allocationincomplete = !srcPtrs || !srcSizes || !cPtrs || + !cSizes || !cCapacities || !resPtrs || !resSizes || + !timeStateCompress || !timeStateDecompress || + !cctx || !dctx || + !compressedBuffer || !resultBuffer; + + + if (!allocationincomplete && !dstParamsError) { + outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes, + cPtrs, cCapacities, cSizes, + resPtrs, resSizes, + &resultBuffer, + compressedBuffer, maxCompressedSize, + timeStateCompress, timeStateDecompress, + srcBuffer, srcSize, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + cctx, dctx, + displayLevel, displayName, adv); + } + + /* clean up */ + BMK_freeTimedFnState(timeStateCompress); + BMK_freeTimedFnState(timeStateDecompress); + + ZSTD_freeCCtx(cctx); + ZSTD_freeDCtx(dctx); + + free(internalDstBuffer); + free(resultBuffer); + + free((void*)srcPtrs); + free(srcSizes); + free(cPtrs); + free(cSizes); + free(cCapacities); + free(resPtrs); + free(resSizes); + + if(allocationincomplete) { + RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory"); + } + + if(dstParamsError) { + RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent"); + } + return outcome; +} + +BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + int displayLevel, const char* displayName) { + + BMK_advancedParams_t const adv = BMK_initAdvancedParams(); + return BMK_benchMemAdvanced(srcBuffer, srcSize, + NULL, 0, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, &adv); +} + +static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize, + const size_t* fileSizes, unsigned nbFiles, + int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + int displayLevel, const char* displayName, + BMK_advancedParams_t const * const adv) +{ + const char* pch = strrchr(displayName, '\\'); /* Windows */ + if (!pch) pch = strrchr(displayName, '/'); /* Linux */ + if (pch) displayName = pch+1; + + if (adv->realTime) { + DISPLAYLEVEL(2, "Note : switching to real-time priority \n"); + SET_REALTIME_PRIORITY; + } + + if (displayLevel == 1 && !adv->additionalParam) /* --quiet mode */ + OUTPUT("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", + ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING, + (unsigned)benchedSize, adv->nbSeconds, (unsigned)(adv->blockSize>>10)); + + return BMK_benchMemAdvanced(srcBuffer, benchedSize, + NULL, 0, + fileSizes, nbFiles, + cLevel, comprParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, adv); +} + +BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv) +{ + char name[20] = {0}; + size_t const benchedSize = 10000000; + void* srcBuffer; + BMK_benchOutcome_t res; + + if (cLevel > ZSTD_maxCLevel()) { + RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level"); + } + + /* Memory allocation */ + srcBuffer = malloc(benchedSize); + if (!srcBuffer) RETURN_ERROR(21, BMK_benchOutcome_t, "not enough memory"); + + /* Fill input buffer */ + RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); + + /* Bench */ + snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); + res = BMK_benchCLevel(srcBuffer, benchedSize, + &benchedSize /* ? */, 1 /* ? */, + cLevel, compressionParams, + NULL, 0, /* dictionary */ + displayLevel, name, adv); + + /* clean up */ + free(srcBuffer); + + return res; +} + + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t const step = 64 MB; + BYTE* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += step; + if (requiredMem > maxMemory) requiredMem = maxMemory; + + do { + testmem = (BYTE*)malloc((size_t)requiredMem); + requiredMem -= step; + } while (!testmem && requiredMem > 0); + + free(testmem); + return (size_t)(requiredMem); +} + +/*! BMK_loadFiles() : + * Loads `buffer` with content of files listed within `fileNamesTable`. + * At most, fills `buffer` entirely. */ +static int BMK_loadFiles(void* buffer, size_t bufferSize, + size_t* fileSizes, + const char* const * fileNamesTable, unsigned nbFiles, + int displayLevel) +{ + size_t pos = 0, totalSize = 0; + unsigned n; + for (n=0; n<nbFiles; n++) { + U64 fileSize = UTIL_getFileSize(fileNamesTable[n]); /* last file may be shortened */ + if (UTIL_isDirectory(fileNamesTable[n])) { + DISPLAYLEVEL(2, "Ignoring %s directory... \n", fileNamesTable[n]); + fileSizes[n] = 0; + continue; + } + if (fileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]); + fileSizes[n] = 0; + continue; + } + { FILE* const f = fopen(fileNamesTable[n], "rb"); + if (f==NULL) RETURN_ERROR_INT(10, "impossible to open file %s", fileNamesTable[n]); + OUTPUTLEVEL(2, "Loading %s... \r", fileNamesTable[n]); + if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n; /* buffer too small - stop after this file */ + { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) RETURN_ERROR_INT(11, "could not read %s", fileNamesTable[n]); + pos += readSize; + } + fileSizes[n] = (size_t)fileSize; + totalSize += (size_t)fileSize; + fclose(f); + } } + + if (totalSize == 0) RETURN_ERROR_INT(12, "no data to bench"); + return 0; +} + +BMK_benchOutcome_t BMK_benchFilesAdvanced( + const char* const * fileNamesTable, unsigned nbFiles, + const char* dictFileName, int cLevel, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv) +{ + void* srcBuffer = NULL; + size_t benchedSize; + void* dictBuffer = NULL; + size_t dictBufferSize = 0; + size_t* fileSizes = NULL; + BMK_benchOutcome_t res; + U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); + + if (!nbFiles) { + RETURN_ERROR(14, BMK_benchOutcome_t, "No Files to Benchmark"); + } + + if (cLevel > ZSTD_maxCLevel()) { + RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level"); + } + + if (totalSizeToLoad == UTIL_FILESIZE_UNKNOWN) { + RETURN_ERROR(9, BMK_benchOutcome_t, "Error loading files"); + } + + fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t)); + if (!fileSizes) RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory for fileSizes"); + + /* Load dictionary */ + if (dictFileName != NULL) { + U64 const dictFileSize = UTIL_getFileSize(dictFileName); + if (dictFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYLEVEL(1, "error loading %s : %s \n", dictFileName, strerror(errno)); + free(fileSizes); + RETURN_ERROR(9, BMK_benchOutcome_t, "benchmark aborted"); + } + if (dictFileSize > 64 MB) { + free(fileSizes); + RETURN_ERROR(10, BMK_benchOutcome_t, "dictionary file %s too large", dictFileName); + } + dictBufferSize = (size_t)dictFileSize; + dictBuffer = malloc(dictBufferSize); + if (dictBuffer==NULL) { + free(fileSizes); + RETURN_ERROR(11, BMK_benchOutcome_t, "not enough memory for dictionary (%u bytes)", + (unsigned)dictBufferSize); + } + + { int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize, + fileSizes, &dictFileName /*?*/, + 1 /*?*/, displayLevel); + if (errorCode) { + res = BMK_benchOutcome_error(); + goto _cleanUp; + } } + } + + /* Memory allocation & restrictions */ + benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; + if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; + if (benchedSize < totalSizeToLoad) + DISPLAY("Not enough memory; testing %u MB only...\n", (unsigned)(benchedSize >> 20)); + + srcBuffer = benchedSize ? malloc(benchedSize) : NULL; + if (!srcBuffer) { + free(dictBuffer); + free(fileSizes); + RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory"); + } + + /* Load input buffer */ + { int const errorCode = BMK_loadFiles(srcBuffer, benchedSize, + fileSizes, fileNamesTable, nbFiles, + displayLevel); + if (errorCode) { + res = BMK_benchOutcome_error(); + goto _cleanUp; + } } + + /* Bench */ + { char mfName[20] = {0}; + snprintf (mfName, sizeof(mfName), " %u files", nbFiles); + { const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + res = BMK_benchCLevel(srcBuffer, benchedSize, + fileSizes, nbFiles, + cLevel, compressionParams, + dictBuffer, dictBufferSize, + displayLevel, displayName, + adv); + } } + +_cleanUp: + free(srcBuffer); + free(dictBuffer); + free(fileSizes); + return res; +} + + +BMK_benchOutcome_t BMK_benchFiles( + const char* const * fileNamesTable, unsigned nbFiles, + const char* dictFileName, + int cLevel, const ZSTD_compressionParameters* compressionParams, + int displayLevel) +{ + BMK_advancedParams_t const adv = BMK_initAdvancedParams(); + return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv); +} diff --git a/contrib/libs/zstd/programs/benchzstd.h b/contrib/libs/zstd/programs/benchzstd.h new file mode 100644 index 0000000000..11ac85da7f --- /dev/null +++ b/contrib/libs/zstd/programs/benchzstd.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /* benchzstd : + * benchmark Zstandard compression / decompression + * over a set of files or buffers + * and display progress result and final summary + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_ZSTD_H_3242387 +#define BENCH_ZSTD_H_3242387 + +/* === Dependencies === */ +#include <stddef.h> /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "../lib/zstd.h" /* ZSTD_compressionParameters */ + + +/* === Constants === */ + +#define MB_UNIT 1000000 + + +/* === Benchmark functions === */ + +/* Creates a variant `typeName`, able to express "error or valid result". + * Functions with return type `typeName` + * must first check if result is valid, using BMK_isSuccessful_*(), + * and only then can extract `baseType`. + */ +#define VARIANT_ERROR_RESULT(baseType, variantName) \ + \ +typedef struct { \ + baseType internal_never_use_directly; \ + int tag; \ +} variantName + + +typedef struct { + size_t cSize; + unsigned long long cSpeed; /* bytes / sec */ + unsigned long long dSpeed; + size_t cMem; /* memory usage during compression */ +} BMK_benchResult_t; + +VARIANT_ERROR_RESULT(BMK_benchResult_t, BMK_benchOutcome_t); + +/* check first if the return structure represents an error or a valid result */ +int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome); + +/* extract result from variant type. + * note : this function will abort() program execution if result is not valid + * check result validity first, by using BMK_isSuccessful_benchOutcome() + */ +BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome); + + +/*! BMK_benchFiles() -- called by zstdcli */ +/* Loads files from fileNamesTable into memory, + * and an optional dictionary from dictFileName (can be NULL), + * then uses benchMem(). + * fileNamesTable - name of files to benchmark. + * nbFiles - number of files (size of fileNamesTable), must be > 0. + * dictFileName - name of dictionary file to load. + * cLevel - compression level to benchmark, errors if invalid. + * compressionParams - advanced compression Parameters. + * displayLevel - what gets printed: + * 0 : no display; + * 1 : errors; + * 2 : + result + interaction + warnings; + * 3 : + information; + * 4 : + debug + * @return: + * a variant, which expresses either an error, or a valid result. + * Use BMK_isSuccessful_benchOutcome() to check if function was successful. + * If yes, extract the valid result with BMK_extract_benchResult(), + * it will contain : + * .cSpeed: compression speed in bytes per second, + * .dSpeed: decompression speed in bytes per second, + * .cSize : compressed size, in bytes + * .cMem : memory budget required for the compression context + */ +BMK_benchOutcome_t BMK_benchFiles( + const char* const * fileNamesTable, unsigned nbFiles, + const char* dictFileName, + int cLevel, const ZSTD_compressionParameters* compressionParams, + int displayLevel); + + +typedef enum { + BMK_both = 0, + BMK_decodeOnly = 1, + BMK_compressOnly = 2 +} BMK_mode_t; + +typedef struct { + BMK_mode_t mode; /* 0: all, 1: compress only 2: decode only */ + unsigned nbSeconds; /* default timing is in nbSeconds */ + size_t blockSize; /* Maximum size of each block*/ + int nbWorkers; /* multithreading */ + unsigned realTime; /* real time priority */ + int additionalParam; /* used by python speed benchmark */ + int ldmFlag; /* enables long distance matching */ + int ldmMinMatch; /* below: parameters for long distance matching, see zstd.1.md */ + int ldmHashLog; + int ldmBucketSizeLog; + int ldmHashRateLog; + ZSTD_paramSwitch_e literalCompressionMode; + int useRowMatchFinder; /* use row-based matchfinder if possible */ +} BMK_advancedParams_t; + +/* returns default parameters used by nonAdvanced functions */ +BMK_advancedParams_t BMK_initAdvancedParams(void); + +/*! BMK_benchFilesAdvanced(): + * Same as BMK_benchFiles(), + * with more controls, provided through advancedParams_t structure */ +BMK_benchOutcome_t BMK_benchFilesAdvanced( + const char* const * fileNamesTable, unsigned nbFiles, + const char* dictFileName, + int cLevel, const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv); + +/*! BMK_syntheticTest() -- called from zstdcli */ +/* Generates a sample with datagen, using compressibility argument */ +/* cLevel - compression level to benchmark, errors if invalid + * compressibility - determines compressibility of sample + * compressionParams - basic compression Parameters + * displayLevel - see benchFiles + * adv - see advanced_Params_t + * @return: + * a variant, which expresses either an error, or a valid result. + * Use BMK_isSuccessful_benchOutcome() to check if function was successful. + * If yes, extract the valid result with BMK_extract_benchResult(), + * it will contain : + * .cSpeed: compression speed in bytes per second, + * .dSpeed: decompression speed in bytes per second, + * .cSize : compressed size, in bytes + * .cMem : memory budget required for the compression context + */ +BMK_benchOutcome_t BMK_syntheticTest( + int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv); + + + +/* === Benchmark Zstandard in a memory-to-memory scenario === */ + +/** BMK_benchMem() -- core benchmarking function, called in paramgrill + * applies ZSTD_compress_generic() and ZSTD_decompress_generic() on data in srcBuffer + * with specific compression parameters provided by other arguments using benchFunction + * (cLevel, comprParams + adv in advanced Mode) */ +/* srcBuffer - data source, expected to be valid compressed data if in Decode Only Mode + * srcSize - size of data in srcBuffer + * fileSizes - srcBuffer is considered cut into 1+ segments, to compress separately. + * note : sum(fileSizes) must be == srcSize. (<== ensure it's properly checked) + * nbFiles - nb of segments + * cLevel - compression level + * comprParams - basic compression parameters + * dictBuffer - a dictionary if used, null otherwise + * dictBufferSize - size of dictBuffer, 0 otherwise + * displayLevel - see BMK_benchFiles + * displayName - name used by display + * @return: + * a variant, which expresses either an error, or a valid result. + * Use BMK_isSuccessful_benchOutcome() to check if function was successful. + * If yes, extract the valid result with BMK_extract_benchResult(), + * it will contain : + * .cSpeed: compression speed in bytes per second, + * .dSpeed: decompression speed in bytes per second, + * .cSize : compressed size, in bytes + * .cMem : memory budget required for the compression context + */ +BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize, + const size_t* fileSizes, unsigned nbFiles, + int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + int displayLevel, const char* displayName); + + +/* BMK_benchMemAdvanced() : same as BMK_benchMem() + * with following additional options : + * dstBuffer - destination buffer to write compressed output in, NULL if none provided. + * dstCapacity - capacity of destination buffer, give 0 if dstBuffer = NULL + * adv = see advancedParams_t + */ +BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, + void* dstBuffer, size_t dstCapacity, + const size_t* fileSizes, unsigned nbFiles, + int cLevel, const ZSTD_compressionParameters* comprParams, + const void* dictBuffer, size_t dictBufferSize, + int displayLevel, const char* displayName, + const BMK_advancedParams_t* adv); + + + +#endif /* BENCH_ZSTD_H_3242387 */ + +#if defined (__cplusplus) +} +#endif diff --git a/contrib/libs/zstd/programs/datagen.c b/contrib/libs/zstd/programs/datagen.c new file mode 100644 index 0000000000..3b4f9e5c7b --- /dev/null +++ b/contrib/libs/zstd/programs/datagen.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************ +* Dependencies +**************************************/ +#include "datagen.h" +#include "platform.h" /* SET_BINARY_MODE */ +#include <stdlib.h> /* malloc, free */ +#include <stdio.h> /* FILE, fwrite, fprintf */ +#include <string.h> /* memcpy */ +#include "../lib/common/mem.h" /* U32 */ + + +/*-************************************ +* Macros +**************************************/ +#define KB *(1 <<10) +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define RDG_DEBUG 0 +#define TRACE(...) if (RDG_DEBUG) fprintf(stderr, __VA_ARGS__ ) + + +/*-************************************ +* Local constants +**************************************/ +#define LTLOG 13 +#define LTSIZE (1<<LTLOG) +#define LTMASK (LTSIZE-1) + + +/*-******************************************************* +* Local Functions +*********************************************************/ +#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 RDG_rand(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = RDG_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +typedef U32 fixedPoint_24_8; + +static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld) +{ + BYTE const firstChar = (ld<=0.0) ? 0 : '('; + BYTE const lastChar = (ld<=0.0) ? 255 : '}'; + BYTE character = (ld<=0.0) ? 0 : '0'; + U32 u; + + if (ld<=0) ld = 0; + for (u=0; u<LTSIZE; ) { + U32 const weight = (((LTSIZE - u) * ld) >> 8) + 1; + U32 const end = MIN ( u + weight , LTSIZE); + while (u < end) ldt[u++] = character; + character++; + if (character > lastChar) character = firstChar; + } +} + + +static BYTE RDG_genChar(U32* seed, const BYTE* ldt) +{ + U32 const id = RDG_rand(seed) & LTMASK; + return ldt[id]; /* memory-sanitizer fails here, stating "uninitialized value" when table initialized with P==0.0. Checked : table is fully initialized */ +} + + +static U32 RDG_rand15Bits (U32* seedPtr) +{ + return RDG_rand(seedPtr) & 0x7FFF; +} + +static U32 RDG_randLength(U32* seedPtr) +{ + if (RDG_rand(seedPtr) & 7) return (RDG_rand(seedPtr) & 0xF); /* small length */ + return (RDG_rand(seedPtr) & 0x1FF) + 0xF; +} + +static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, + double matchProba, const BYTE* ldt, U32* seedPtr) +{ + BYTE* const buffPtr = (BYTE*)buffer; + U32 const matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + U32 prevOffset = 1; + + /* special case : sparse content */ + while (matchProba >= 1.0) { + size_t size0 = RDG_rand(seedPtr) & 3; + size0 = (size_t)1 << (16 + size0 * 2); + size0 += RDG_rand(seedPtr) & (size0-1); /* because size0 is power of 2*/ + if (buffSize < pos + size0) { + memset(buffPtr+pos, 0, buffSize-pos); + return; + } + memset(buffPtr+pos, 0, size0); + pos += size0; + buffPtr[pos-1] = RDG_genChar(seedPtr, ldt); + continue; + } + + /* init */ + if (pos==0) buffPtr[0] = RDG_genChar(seedPtr, ldt), pos=1; + + /* Generate compressible data */ + while (pos < buffSize) { + /* Select : Literal (char) or Match (within 32K) */ + if (RDG_rand15Bits(seedPtr) < matchProba32) { + /* Copy (within 32K) */ + U32 const length = RDG_randLength(seedPtr) + 4; + U32 const d = (U32) MIN(pos + length , buffSize); + U32 const repeatOffset = (RDG_rand(seedPtr) & 15) == 2; + U32 const randOffset = RDG_rand15Bits(seedPtr) + 1; + U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos); + size_t match = pos - offset; + while (pos < d) { buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ } + prevOffset = offset; + } else { + /* Literal (noise) */ + U32 const length = RDG_randLength(seedPtr); + U32 const d = (U32) MIN(pos + length, buffSize); + while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); } + } } +} + + +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) +{ + U32 seed32 = seed; + BYTE ldt[LTSIZE]; + memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ + if (litProba<=0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001)); + RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32); +} + + +void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed) +{ + U32 seed32 = seed; + size_t const stdBlockSize = 128 KB; + size_t const stdDictSize = 32 KB; + BYTE* const buff = (BYTE*)malloc(stdDictSize + stdBlockSize); + U64 total = 0; + BYTE ldt[LTSIZE]; /* literals distribution table */ + + /* init */ + if (buff==NULL) { perror("datagen"); exit(1); } + if (litProba<=0.0) litProba = matchProba / 4.5; + memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */ + RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001)); + SET_BINARY_MODE(stdout); + + /* Generate initial dict */ + RDG_genBlock(buff, stdDictSize, 0, matchProba, ldt, &seed32); + + /* Generate compressible data */ + while (total < size) { + size_t const genBlockSize = (size_t) (MIN (stdBlockSize, size-total)); + RDG_genBlock(buff, stdDictSize+stdBlockSize, stdDictSize, matchProba, ldt, &seed32); + total += genBlockSize; + { size_t const unused = fwrite(buff, 1, genBlockSize, stdout); (void)unused; } + /* update dict */ + memcpy(buff, buff + stdBlockSize, stdDictSize); + } + + /* cleanup */ + free(buff); +} diff --git a/contrib/libs/zstd/programs/datagen.h b/contrib/libs/zstd/programs/datagen.h new file mode 100644 index 0000000000..b76ae2a222 --- /dev/null +++ b/contrib/libs/zstd/programs/datagen.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef DATAGEN_H +#define DATAGEN_H + +#include <stddef.h> /* size_t */ + +void RDG_genStdout(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/*!RDG_genBuffer + Generate 'size' bytes of compressible data into 'buffer'. + Compressibility can be controlled using 'matchProba', which is floating point value between 0 and 1. + 'LitProba' is optional, it affect variability of individual bytes. If litProba==0.0, default value will be used. + Generated data pattern can be modified using different 'seed'. + For a triplet (matchProba, litProba, seed), the function always generate the same content. + + RDG_genStdout + Same as RDG_genBuffer, but generates data into stdout +*/ + +#endif diff --git a/contrib/libs/zstd/programs/dibio.c b/contrib/libs/zstd/programs/dibio.c new file mode 100644 index 0000000000..d19f954486 --- /dev/null +++ b/contrib/libs/zstd/programs/dibio.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************** +* Compiler Warnings +****************************************/ +#ifdef _MSC_VER +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/*-************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_getTotalFileSize */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* memset */ +#include <stdio.h> /* fprintf, fopen, ftello64 */ +#include <errno.h> /* errno */ +#include <assert.h> + +#include "timefn.h" /* UTIL_time_t, UTIL_clockSpanMicro, UTIL_getTime */ +#include "../lib/common/mem.h" /* read */ +#include "dibio.h" + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SAMPLESIZE_MAX (128 KB) +#define MEMMULT 11 /* rough estimation : memory cost to analyze 1 byte of sample */ +#define COVER_MEMMULT 9 /* rough estimation : memory cost to analyze 1 byte of sample */ +#define FASTCOVER_MEMMULT 1 /* rough estimation : memory cost to analyze 1 byte of sample */ +static const size_t g_maxMemory = (sizeof(size_t) == 4) ? (2 GB - 64 MB) : ((size_t)(512 MB) << sizeof(size_t)); + +#define NOISELENGTH 32 +#define MAX_SAMPLES_SIZE (2 GB) /* training dataset limited to 2GB */ + + +/*-************************************* +* Console display +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \ + if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \ + { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \ + if (displayLevel>=4) fflush(stderr); } } } + +/*-************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAY("Error %i : ", error); \ + DISPLAY(__VA_ARGS__); \ + DISPLAY("\n"); \ + exit(error); \ +} + + +/* ******************************************************** +* Helper functions +**********************************************************/ +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +/** + Returns the size of a file. + If error returns -1. +*/ +static S64 DiB_getFileSize (const char * fileName) +{ + U64 const fileSize = UTIL_getFileSize(fileName); + return (fileSize == UTIL_FILESIZE_UNKNOWN) ? -1 : (S64)fileSize; +} + +/* ******************************************************** +* File related operations +**********************************************************/ +/** DiB_loadFiles() : + * load samples from files listed in fileNamesTable into buffer. + * works even if buffer is too small to load all samples. + * Also provides the size of each sample into sampleSizes table + * which must be sized correctly, using DiB_fileStats(). + * @return : nb of samples effectively loaded into `buffer` + * *bufferSizePtr is modified, it provides the amount data loaded within buffer. + * sampleSizes is filled with the size of each sample. + */ +static int DiB_loadFiles( + void* buffer, size_t* bufferSizePtr, + size_t* sampleSizes, int sstSize, + const char** fileNamesTable, int nbFiles, + size_t targetChunkSize, int displayLevel ) +{ + char* const buff = (char*)buffer; + size_t totalDataLoaded = 0; + int nbSamplesLoaded = 0; + int fileIndex = 0; + FILE * f = NULL; + + assert(targetChunkSize <= SAMPLESIZE_MAX); + + while ( nbSamplesLoaded < sstSize && fileIndex < nbFiles ) { + size_t fileDataLoaded; + S64 const fileSize = DiB_getFileSize(fileNamesTable[fileIndex]); + if (fileSize <= 0) /* skip if zero-size or file error */ + continue; + + f = fopen( fileNamesTable[fileIndex], "rb"); + if (f == NULL) + EXM_THROW(10, "zstd: dictBuilder: %s %s ", fileNamesTable[fileIndex], strerror(errno)); + DISPLAYUPDATE(2, "Loading %s... \r", fileNamesTable[fileIndex]); + + /* Load the first chunk of data from the file */ + fileDataLoaded = targetChunkSize > 0 ? + (size_t)MIN(fileSize, (S64)targetChunkSize) : + (size_t)MIN(fileSize, SAMPLESIZE_MAX ); + if (totalDataLoaded + fileDataLoaded > *bufferSizePtr) + break; + if (fread( buff+totalDataLoaded, 1, fileDataLoaded, f ) != fileDataLoaded) + EXM_THROW(11, "Pb reading %s", fileNamesTable[fileIndex]); + sampleSizes[nbSamplesLoaded++] = fileDataLoaded; + totalDataLoaded += fileDataLoaded; + + /* If file-chunking is enabled, load the rest of the file as more samples */ + if (targetChunkSize > 0) { + while( (S64)fileDataLoaded < fileSize && nbSamplesLoaded < sstSize ) { + size_t const chunkSize = MIN((size_t)(fileSize-fileDataLoaded), targetChunkSize); + if (totalDataLoaded + chunkSize > *bufferSizePtr) /* buffer is full */ + break; + + if (fread( buff+totalDataLoaded, 1, chunkSize, f ) != chunkSize) + EXM_THROW(11, "Pb reading %s", fileNamesTable[fileIndex]); + sampleSizes[nbSamplesLoaded++] = chunkSize; + totalDataLoaded += chunkSize; + fileDataLoaded += chunkSize; + } + } + fileIndex += 1; + fclose(f); f = NULL; + } + if (f != NULL) + fclose(f); + + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(4, "Loaded %d KB total training data, %d nb samples \n", + (int)(totalDataLoaded / (1 KB)), nbSamplesLoaded ); + *bufferSizePtr = totalDataLoaded; + return nbSamplesLoaded; +} + +#define DiB_rotl32(x,r) ((x << r) | (x >> (32 - r))) +static U32 DiB_rand(U32* src) +{ + static const U32 prime1 = 2654435761U; + static const U32 prime2 = 2246822519U; + U32 rand32 = *src; + rand32 *= prime1; + rand32 ^= prime2; + rand32 = DiB_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + +/* DiB_shuffle() : + * shuffle a table of file names in a semi-random way + * It improves dictionary quality by reducing "locality" impact, so if sample set is very large, + * it will load random elements from it, instead of just the first ones. */ +static void DiB_shuffle(const char** fileNamesTable, unsigned nbFiles) { + U32 seed = 0xFD2FB528; + unsigned i; + assert(nbFiles >= 1); + for (i = nbFiles - 1; i > 0; --i) { + unsigned const j = DiB_rand(&seed) % (i + 1); + const char* const tmp = fileNamesTable[j]; + fileNamesTable[j] = fileNamesTable[i]; + fileNamesTable[i] = tmp; + } +} + + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static size_t DiB_findMaxMem(unsigned long long requiredMem) +{ + size_t const step = 8 MB; + void* testmem = NULL; + + requiredMem = (((requiredMem >> 23) + 1) << 23); + requiredMem += step; + if (requiredMem > g_maxMemory) requiredMem = g_maxMemory; + + while (!testmem) { + testmem = malloc((size_t)requiredMem); + requiredMem -= step; + } + + free(testmem); + return (size_t)requiredMem; +} + + +static void DiB_fillNoise(void* buffer, size_t length) +{ + unsigned const prime1 = 2654435761U; + unsigned const prime2 = 2246822519U; + unsigned acc = prime1; + size_t p=0; + + for (p=0; p<length; p++) { + acc *= prime2; + ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21); + } +} + + +static void DiB_saveDict(const char* dictFileName, + const void* buff, size_t buffSize) +{ + FILE* const f = fopen(dictFileName, "wb"); + if (f==NULL) EXM_THROW(3, "cannot open %s ", dictFileName); + + { size_t const n = fwrite(buff, 1, buffSize, f); + if (n!=buffSize) EXM_THROW(4, "%s : write error", dictFileName) } + + { size_t const n = (size_t)fclose(f); + if (n!=0) EXM_THROW(5, "%s : flush error", dictFileName) } +} + +typedef struct { + S64 totalSizeToLoad; + int nbSamples; + int oneSampleTooLarge; +} fileStats; + +/*! DiB_fileStats() : + * Given a list of files, and a chunkSize (0 == no chunk, whole files) + * provides the amount of data to be loaded and the resulting nb of samples. + * This is useful primarily for allocation purpose => sample buffer, and sample sizes table. + */ +static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t chunkSize, int displayLevel) +{ + fileStats fs; + int n; + memset(&fs, 0, sizeof(fs)); + + // We assume that if chunking is requested, the chunk size is < SAMPLESIZE_MAX + assert( chunkSize <= SAMPLESIZE_MAX ); + + for (n=0; n<nbFiles; n++) { + S64 const fileSize = DiB_getFileSize(fileNamesTable[n]); + // TODO: is there a minimum sample size? What if the file is 1-byte? + if (fileSize == 0) { + DISPLAYLEVEL(3, "Sample file '%s' has zero size, skipping...\n", fileNamesTable[n]); + continue; + } + + /* the case where we are breaking up files in sample chunks */ + if (chunkSize > 0) + { + // TODO: is there a minimum sample size? Can we have a 1-byte sample? + fs.nbSamples += (int)((fileSize + chunkSize-1) / chunkSize); + fs.totalSizeToLoad += fileSize; + } + else { + /* the case where one file is one sample */ + if (fileSize > SAMPLESIZE_MAX) { + /* flag excessively large sample files */ + fs.oneSampleTooLarge |= (fileSize > 2*SAMPLESIZE_MAX); + + /* Limit to the first SAMPLESIZE_MAX (128kB) of the file */ + DISPLAYLEVEL(3, "Sample file '%s' is too large, limiting to %d KB", + fileNamesTable[n], SAMPLESIZE_MAX / (1 KB)); + } + fs.nbSamples += 1; + fs.totalSizeToLoad += MIN(fileSize, SAMPLESIZE_MAX); + } + } + DISPLAYLEVEL(4, "Found training data %d files, %d KB, %d samples\n", nbFiles, (int)(fs.totalSizeToLoad / (1 KB)), fs.nbSamples); + return fs; +} + +int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, + const char** fileNamesTable, int nbFiles, size_t chunkSize, + ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams, + ZDICT_fastCover_params_t* fastCoverParams, int optimize, unsigned memLimit) +{ + fileStats fs; + size_t* sampleSizes; /* vector of sample sizes. Each sample can be up to SAMPLESIZE_MAX */ + int nbSamplesLoaded; /* nb of samples effectively loaded in srcBuffer */ + size_t loadedSize; /* total data loaded in srcBuffer for all samples */ + void* srcBuffer /* contiguous buffer with training data/samples */; + void* const dictBuffer = malloc(maxDictSize); + int result = 0; + + int const displayLevel = params ? params->zParams.notificationLevel : + coverParams ? coverParams->zParams.notificationLevel : + fastCoverParams ? fastCoverParams->zParams.notificationLevel : 0; + + /* Shuffle input files before we start assessing how much sample datA to load. + The purpose of the shuffle is to pick random samples when the sample + set is larger than what we can load in memory. */ + DISPLAYLEVEL(3, "Shuffling input files\n"); + DiB_shuffle(fileNamesTable, nbFiles); + + /* Figure out how much sample data to load with how many samples */ + fs = DiB_fileStats(fileNamesTable, nbFiles, chunkSize, displayLevel); + + { + int const memMult = params ? MEMMULT : + coverParams ? COVER_MEMMULT: + FASTCOVER_MEMMULT; + size_t const maxMem = DiB_findMaxMem(fs.totalSizeToLoad * memMult) / memMult; + /* Limit the size of the training data to the free memory */ + /* Limit the size of the training data to 2GB */ + /* TODO: there is opportunity to stop DiB_fileStats() early when the data limit is reached */ + loadedSize = (size_t)MIN( MIN((S64)maxMem, fs.totalSizeToLoad), MAX_SAMPLES_SIZE ); + if (memLimit != 0) { + DISPLAYLEVEL(2, "! Warning : setting manual memory limit for dictionary training data at %u MB \n", + (unsigned)(memLimit / (1 MB))); + loadedSize = (size_t)MIN(loadedSize, memLimit); + } + srcBuffer = malloc(loadedSize+NOISELENGTH); + sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t)); + } + + /* Checks */ + if ((!sampleSizes) || (!srcBuffer) || (!dictBuffer)) + EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */ + if (fs.oneSampleTooLarge) { + DISPLAYLEVEL(2, "! Warning : some sample(s) are very large \n"); + DISPLAYLEVEL(2, "! Note that dictionary is only useful for small samples. \n"); + DISPLAYLEVEL(2, "! As a consequence, only the first %u bytes of each sample are loaded \n", SAMPLESIZE_MAX); + } + if (fs.nbSamples < 5) { + DISPLAYLEVEL(2, "! Warning : nb of samples too low for proper processing ! \n"); + DISPLAYLEVEL(2, "! Please provide _one file per sample_. \n"); + DISPLAYLEVEL(2, "! Alternatively, split files into fixed-size blocks representative of samples, with -B# \n"); + EXM_THROW(14, "nb of samples too low"); /* we now clearly forbid this case */ + } + if (fs.totalSizeToLoad < (S64)maxDictSize * 8) { + DISPLAYLEVEL(2, "! Warning : data size of samples too small for target dictionary size \n"); + DISPLAYLEVEL(2, "! Samples should be about 100x larger than target dictionary size \n"); + } + + /* init */ + if ((S64)loadedSize < fs.totalSizeToLoad) + DISPLAYLEVEL(1, "Training samples set too large (%u MB); training on %u MB only...\n", + (unsigned)(fs.totalSizeToLoad / (1 MB)), + (unsigned)(loadedSize / (1 MB))); + + /* Load input buffer */ + nbSamplesLoaded = DiB_loadFiles( + srcBuffer, &loadedSize, sampleSizes, fs.nbSamples, fileNamesTable, + nbFiles, chunkSize, displayLevel); + + { size_t dictSize; + if (params) { + DiB_fillNoise((char*)srcBuffer + loadedSize, NOISELENGTH); /* guard band, for end of buffer condition */ + dictSize = ZDICT_trainFromBuffer_legacy(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, nbSamplesLoaded, + *params); + } else if (coverParams) { + if (optimize) { + dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, nbSamplesLoaded, + coverParams); + if (!ZDICT_isError(dictSize)) { + unsigned splitPercentage = (unsigned)(coverParams->splitPoint * 100); + DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\nsplit=%u\n", coverParams->k, coverParams->d, + coverParams->steps, splitPercentage); + } + } else { + dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer, + sampleSizes, nbSamplesLoaded, *coverParams); + } + } else { + assert(fastCoverParams != NULL); + if (optimize) { + dictSize = ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, maxDictSize, + srcBuffer, sampleSizes, nbSamplesLoaded, + fastCoverParams); + if (!ZDICT_isError(dictSize)) { + unsigned splitPercentage = (unsigned)(fastCoverParams->splitPoint * 100); + DISPLAYLEVEL(2, "k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\n", fastCoverParams->k, + fastCoverParams->d, fastCoverParams->f, fastCoverParams->steps, splitPercentage, + fastCoverParams->accel); + } + } else { + dictSize = ZDICT_trainFromBuffer_fastCover(dictBuffer, maxDictSize, srcBuffer, + sampleSizes, nbSamplesLoaded, *fastCoverParams); + } + } + if (ZDICT_isError(dictSize)) { + DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */ + result = 1; + goto _cleanup; + } + /* save dict */ + DISPLAYLEVEL(2, "Save dictionary of size %u into file %s \n", (unsigned)dictSize, dictFileName); + DiB_saveDict(dictFileName, dictBuffer, dictSize); + } + + /* clean up */ +_cleanup: + free(srcBuffer); + free(sampleSizes); + free(dictBuffer); + return result; +} diff --git a/contrib/libs/zstd/programs/dibio.h b/contrib/libs/zstd/programs/dibio.h new file mode 100644 index 0000000000..666c1e6618 --- /dev/null +++ b/contrib/libs/zstd/programs/dibio.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This library is designed for a single-threaded console application. +* It exit() and printf() into stderr when it encounters an error condition. */ + +#ifndef DIBIO_H_003 +#define DIBIO_H_003 + + +/*-************************************* +* Dependencies +***************************************/ +#define ZDICT_STATIC_LINKING_ONLY +#include "../lib/zdict.h" /* ZDICT_params_t */ + + +/*-************************************* +* Public functions +***************************************/ +/*! DiB_trainFromFiles() : + Train a dictionary from a set of files provided by `fileNamesTable`. + Resulting dictionary is written into file `dictFileName`. + `parameters` is optional and can be provided with values set to 0, meaning "default". + @return : 0 == ok. Any other : error. +*/ +int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize, + const char** fileNamesTable, int nbFiles, size_t chunkSize, + ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams, + ZDICT_fastCover_params_t* fastCoverParams, int optimize, unsigned memLimit); + +#endif diff --git a/contrib/libs/zstd/programs/fileio.c b/contrib/libs/zstd/programs/fileio.c new file mode 100644 index 0000000000..0a0dfc42cb --- /dev/null +++ b/contrib/libs/zstd/programs/fileio.c @@ -0,0 +1,3221 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ************************************* +* Compiler Options +***************************************/ +#ifdef _MSC_VER /* Visual */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* non-constant aggregate initializer */ +#endif +#if defined(__MINGW32__) && !defined(_POSIX_SOURCE) +# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ +#endif + +/*-************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support, SET_BINARY_MODE */ +#include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */ +#include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */ +#include <stdlib.h> /* malloc, free */ +#include <string.h> /* strcmp, strlen */ +#include <fcntl.h> /* O_WRONLY */ +#include <assert.h> +#include <errno.h> /* errno */ +#include <limits.h> /* INT_MAX */ +#include <signal.h> +#include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */ + +#if defined (_MSC_VER) +# include <sys/stat.h> +# include <io.h> +#endif + +#include "../lib/common/mem.h" /* U32, U64 */ +#include "fileio.h" + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */ +#include "../lib/zstd.h" +#include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */ + +#if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS) +# error #include <zlib.h> +# if !defined(z_const) +# define z_const +# endif +#endif + +#if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS) +# error #include <lzma.h> +#endif + +#define LZ4_MAGICNUMBER 0x184D2204 +#if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS) +# define LZ4F_ENABLE_OBSOLETE_ENUMS +# error #include <lz4frame.h> +# error #include <lz4.h> +#endif + + +/*-************************************* +* Constants +***************************************/ +#define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */ +#define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */ + +#define FNSPACE 30 + +/* Default file permissions 0666 (modulated by umask) */ +#if !defined(_WIN32) +/* These macros aren't defined on windows. */ +#define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH) +#else +#define DEFAULT_FILE_PERMISSIONS (0666) +#endif + +/*-************************************* +* Macros +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) +#undef MAX +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +struct FIO_display_prefs_s { + int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */ + FIO_progressSetting_e progressSetting; +}; + +static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto}; + +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } } + +static const U64 g_refreshRate = SEC_TO_MICRO / 6; +static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER; + +#define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) +#define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); } +#define DISPLAYUPDATE(l, ...) { \ + if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \ + if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \ + DELAY_NEXT_UPDATE(); \ + DISPLAY(__VA_ARGS__); \ + if (g_display_prefs.displayLevel>=4) fflush(stderr); \ + } } } + +#undef MIN /* in case it would be already defined */ +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + + +#define EXM_THROW(error, ...) \ +{ \ + DISPLAYLEVEL(1, "zstd: "); \ + DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + +#define CHECK_V(v, f) \ + v = f; \ + if (ZSTD_isError(v)) { \ + DISPLAYLEVEL(5, "%s \n", #f); \ + EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \ + } +#define CHECK(f) { size_t err; CHECK_V(err, f); } + + +/*-************************************ +* Signal (Ctrl-C trapping) +**************************************/ +static const char* g_artefact = NULL; +static void INThandler(int sig) +{ + assert(sig==SIGINT); (void)sig; +#if !defined(_MSC_VER) + signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */ +#endif + if (g_artefact) { + assert(UTIL_isRegularFile(g_artefact)); + remove(g_artefact); + } + DISPLAY("\n"); + exit(2); +} +static void addHandler(char const* dstFileName) +{ + if (UTIL_isRegularFile(dstFileName)) { + g_artefact = dstFileName; + signal(SIGINT, INThandler); + } else { + g_artefact = NULL; + } +} +/* Idempotent */ +static void clearHandler(void) +{ + if (g_artefact) signal(SIGINT, SIG_DFL); + g_artefact = NULL; +} + + +/*-********************************************************* +* Termination signal trapping (Print debug stack trace) +***********************************************************/ +#if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */ +# if (__has_feature(address_sanitizer)) +# define BACKTRACE_ENABLE 0 +# endif /* __has_feature(address_sanitizer) */ +#elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */ +# define BACKTRACE_ENABLE 0 +#endif + +#if !defined(BACKTRACE_ENABLE) +/* automatic detector : backtrace enabled by default on linux+glibc and osx */ +# if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \ + || (defined(__APPLE__) && defined(__MACH__)) +# define BACKTRACE_ENABLE 1 +# else +# define BACKTRACE_ENABLE 0 +# endif +#endif + +/* note : after this point, BACKTRACE_ENABLE is necessarily defined */ + + +#if BACKTRACE_ENABLE + +#include <execinfo.h> /* backtrace, backtrace_symbols */ + +#define MAX_STACK_FRAMES 50 + +static void ABRThandler(int sig) { + const char* name; + void* addrlist[MAX_STACK_FRAMES]; + char** symbollist; + int addrlen, i; + + switch (sig) { + case SIGABRT: name = "SIGABRT"; break; + case SIGFPE: name = "SIGFPE"; break; + case SIGILL: name = "SIGILL"; break; + case SIGINT: name = "SIGINT"; break; + case SIGSEGV: name = "SIGSEGV"; break; + default: name = "UNKNOWN"; + } + + DISPLAY("Caught %s signal, printing stack:\n", name); + /* Retrieve current stack addresses. */ + addrlen = backtrace(addrlist, MAX_STACK_FRAMES); + if (addrlen == 0) { + DISPLAY("\n"); + return; + } + /* Create readable strings to each frame. */ + symbollist = backtrace_symbols(addrlist, addrlen); + /* Print the stack trace, excluding calls handling the signal. */ + for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) { + DISPLAY("%s\n", symbollist[i]); + } + free(symbollist); + /* Reset and raise the signal so default handler runs. */ + signal(sig, SIG_DFL); + raise(sig); +} +#endif + +void FIO_addAbortHandler() +{ +#if BACKTRACE_ENABLE + signal(SIGABRT, ABRThandler); + signal(SIGFPE, ABRThandler); + signal(SIGILL, ABRThandler); + signal(SIGSEGV, ABRThandler); + signal(SIGBUS, ABRThandler); +#endif +} + + +/*-************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +# define LONG_SEEK _fseeki64 +# define LONG_TELL _ftelli64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define LONG_SEEK fseeko +# define LONG_TELL ftello +#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__) +# define LONG_SEEK fseeko64 +# define LONG_TELL ftello64 +#elif defined(_WIN32) && !defined(__DJGPP__) +# include <windows.h> + static int LONG_SEEK(FILE* file, __int64 offset, int origin) { + LARGE_INTEGER off; + DWORD method; + off.QuadPart = offset; + if (origin == SEEK_END) + method = FILE_END; + else if (origin == SEEK_CUR) + method = FILE_CURRENT; + else + method = FILE_BEGIN; + + if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method)) + return 0; + else + return -1; + } + static __int64 LONG_TELL(FILE* file) { + LARGE_INTEGER off, newOff; + off.QuadPart = 0; + newOff.QuadPart = 0; + SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT); + return newOff.QuadPart; + } +#else +# define LONG_SEEK fseek +# define LONG_TELL ftell +#endif + + +/*-************************************* +* Parameters: FIO_prefs_t +***************************************/ + +/* typedef'd to FIO_prefs_t within fileio.h */ +struct FIO_prefs_s { + + /* Algorithm preferences */ + FIO_compressionType_t compressionType; + U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */ + int dictIDFlag; + int checksumFlag; + int blockSize; + int overlapLog; + U32 adaptiveMode; + U32 useRowMatchFinder; + int rsyncable; + int minAdaptLevel; + int maxAdaptLevel; + int ldmFlag; + int ldmHashLog; + int ldmMinMatch; + int ldmBucketSizeLog; + int ldmHashRateLog; + size_t streamSrcSize; + size_t targetCBlockSize; + int srcSizeHint; + int testMode; + ZSTD_paramSwitch_e literalCompressionMode; + + /* IO preferences */ + U32 removeSrcFile; + U32 overwrite; + + /* Computation resources preferences */ + unsigned memLimit; + int nbWorkers; + + int excludeCompressedFiles; + int patchFromMode; + int contentSize; + int allowBlockDevices; +}; + +/*-************************************* +* Parameters: FIO_ctx_t +***************************************/ + +/* typedef'd to FIO_ctx_t within fileio.h */ +struct FIO_ctx_s { + + /* file i/o info */ + int nbFilesTotal; + int hasStdinInput; + int hasStdoutOutput; + + /* file i/o state */ + int currFileIdx; + int nbFilesProcessed; + size_t totalBytesInput; + size_t totalBytesOutput; +}; + + +/*-************************************* +* Parameters: Initialization +***************************************/ + +#define FIO_OVERLAP_LOG_NOTSET 9999 +#define FIO_LDM_PARAM_NOTSET 9999 + + +FIO_prefs_t* FIO_createPreferences(void) +{ + FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + + ret->compressionType = FIO_zstdCompression; + ret->overwrite = 0; + ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT; + ret->dictIDFlag = 1; + ret->checksumFlag = 1; + ret->removeSrcFile = 0; + ret->memLimit = 0; + ret->nbWorkers = 1; + ret->blockSize = 0; + ret->overlapLog = FIO_OVERLAP_LOG_NOTSET; + ret->adaptiveMode = 0; + ret->rsyncable = 0; + ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */ + ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */ + ret->ldmFlag = 0; + ret->ldmHashLog = 0; + ret->ldmMinMatch = 0; + ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET; + ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET; + ret->streamSrcSize = 0; + ret->targetCBlockSize = 0; + ret->srcSizeHint = 0; + ret->testMode = 0; + ret->literalCompressionMode = ZSTD_ps_auto; + ret->excludeCompressedFiles = 0; + ret->allowBlockDevices = 0; + return ret; +} + +FIO_ctx_t* FIO_createContext(void) +{ + FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + + ret->currFileIdx = 0; + ret->hasStdinInput = 0; + ret->hasStdoutOutput = 0; + ret->nbFilesTotal = 1; + ret->nbFilesProcessed = 0; + ret->totalBytesInput = 0; + ret->totalBytesOutput = 0; + return ret; +} + +void FIO_freePreferences(FIO_prefs_t* const prefs) +{ + free(prefs); +} + +void FIO_freeContext(FIO_ctx_t* const fCtx) +{ + free(fCtx); +} + + +/*-************************************* +* Parameters: Display Options +***************************************/ + +void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; } + +void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; } + + +/*-************************************* +* Parameters: Setters +***************************************/ + +/* FIO_prefs_t functions */ + +void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; } + +void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; } + +void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; } + +void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; } + +void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; } + +void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); } + +void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; } + +void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) { +#ifndef ZSTD_MULTITHREAD + if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n"); +#endif + prefs->nbWorkers = nbWorkers; +} + +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; } + +void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; } + +void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) { + if (blockSize && prefs->nbWorkers==0) + DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n"); + prefs->blockSize = blockSize; +} + +void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){ + if (overlapLog && prefs->nbWorkers==0) + DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n"); + prefs->overlapLog = overlapLog; +} + +void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) { + if ((adapt>0) && (prefs->nbWorkers==0)) + EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n"); + prefs->adaptiveMode = adapt; +} + +void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) { + prefs->useRowMatchFinder = useRowMatchFinder; +} + +void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) { + if ((rsyncable>0) && (prefs->nbWorkers==0)) + EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n"); + prefs->rsyncable = rsyncable; +} + +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) { + prefs->streamSrcSize = streamSrcSize; +} + +void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) { + prefs->targetCBlockSize = targetCBlockSize; +} + +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) { + prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint); +} + +void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) { + prefs->testMode = (testMode!=0); +} + +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_paramSwitch_e mode) { + prefs->literalCompressionMode = mode; +} + +void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel) +{ +#ifndef ZSTD_NOCOMPRESS + assert(minCLevel >= ZSTD_minCLevel()); +#endif + prefs->minAdaptLevel = minCLevel; +} + +void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel) +{ + prefs->maxAdaptLevel = maxCLevel; +} + +void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) { + prefs->ldmFlag = (ldmFlag>0); +} + +void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) { + prefs->ldmHashLog = ldmHashLog; +} + +void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) { + prefs->ldmMinMatch = ldmMinMatch; +} + +void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) { + prefs->ldmBucketSizeLog = ldmBucketSizeLog; +} + + +void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) { + prefs->ldmHashRateLog = ldmHashRateLog; +} + +void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value) +{ + prefs->patchFromMode = value != 0; +} + +void FIO_setContentSize(FIO_prefs_t* const prefs, int value) +{ + prefs->contentSize = value != 0; +} + +/* FIO_ctx_t functions */ + +void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) { + fCtx->hasStdoutOutput = value; +} + +void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value) +{ + fCtx->nbFilesTotal = value; +} + +void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) { + size_t i = 0; + for ( ; i < filenames->tableSize; ++i) { + if (!strcmp(stdinmark, filenames->fileNames[i])) { + fCtx->hasStdinInput = 1; + return; + } + } +} + +/*-************************************* +* Functions +***************************************/ +/** FIO_removeFile() : + * @result : Unlink `fileName`, even if it's read-only */ +static int FIO_removeFile(const char* path) +{ + stat_t statbuf; + if (!UTIL_stat(path, &statbuf)) { + DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path); + return 0; + } + if (!UTIL_isRegularFileStat(&statbuf)) { + DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path); + return 0; + } +#if defined(_WIN32) || defined(WIN32) + /* windows doesn't allow remove read-only files, + * so try to make it writable first */ + if (!(statbuf.st_mode & _S_IWRITE)) { + UTIL_chmod(path, &statbuf, _S_IWRITE); + } +#endif + return remove(path); +} + +/** FIO_openSrcFile() : + * condition : `srcFileName` must be non-NULL. `prefs` may be NULL. + * @result : FILE* to `srcFileName`, or NULL if it fails */ +static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName) +{ + stat_t statbuf; + int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0; + assert(srcFileName != NULL); + if (!strcmp (srcFileName, stdinmark)) { + DISPLAYLEVEL(4,"Using stdin for input \n"); + SET_BINARY_MODE(stdin); + return stdin; + } + + if (!UTIL_stat(srcFileName, &statbuf)) { + DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n", + srcFileName, strerror(errno)); + return NULL; + } + + if (!UTIL_isRegularFileStat(&statbuf) + && !UTIL_isFIFOStat(&statbuf) + && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf)) + ) { + DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n", + srcFileName); + return NULL; + } + + { FILE* const f = fopen(srcFileName, "rb"); + if (f == NULL) + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + return f; + } +} + +/** FIO_openDstFile() : + * condition : `dstFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* +FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs, + const char* srcFileName, const char* dstFileName, + const int mode) +{ + if (prefs->testMode) return NULL; /* do not open file in test mode */ + + assert(dstFileName != NULL); + if (!strcmp (dstFileName, stdoutmark)) { + DISPLAYLEVEL(4,"Using stdout for output \n"); + SET_BINARY_MODE(stdout); + if (prefs->sparseFileSupport == 1) { + prefs->sparseFileSupport = 0; + DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n"); + } + return stdout; + } + + /* ensure dst is not the same as src */ + if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) { + DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n"); + return NULL; + } + + if (prefs->sparseFileSupport == 1) { + prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT; + } + + if (UTIL_isRegularFile(dstFileName)) { + /* Check if destination file already exists */ +#if !defined(_WIN32) + /* this test does not work on Windows : + * `NUL` and `nul` are detected as regular files */ + if (!strcmp(dstFileName, nulmark)) { + EXM_THROW(40, "%s is unexpectedly categorized as a regular file", + dstFileName); + } +#endif + if (!prefs->overwrite) { + if (g_display_prefs.displayLevel <= 1) { + /* No interaction possible */ + DISPLAY("zstd: %s already exists; not overwritten \n", + dstFileName); + return NULL; + } + DISPLAY("zstd: %s already exists; ", dstFileName); + if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput)) + return NULL; + } + /* need to unlink */ + FIO_removeFile(dstFileName); + } + + { +#if defined(_WIN32) + /* Windows requires opening the file as a "binary" file to avoid + * mangling. This macro doesn't exist on unix. */ + const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY; + const int fd = _open(dstFileName, openflags, mode); + FILE* f = NULL; + if (fd != -1) { + f = _fdopen(fd, "wb"); + } +#else + const int openflags = O_WRONLY|O_CREAT|O_TRUNC; + const int fd = open(dstFileName, openflags, mode); + FILE* f = NULL; + if (fd != -1) { + f = fdopen(fd, "wb"); + } +#endif + if (f == NULL) { + DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); + } + return f; + } +} + +/*! FIO_createDictBuffer() : + * creates a buffer, pointed by `*bufferPtr`, + * loads `filename` content into it, up to DICTSIZE_MAX bytes. + * @return : loaded size + * if fileName==NULL, returns 0 and a NULL pointer + */ +static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs) +{ + FILE* fileHandle; + U64 fileSize; + stat_t statbuf; + + assert(bufferPtr != NULL); + *bufferPtr = NULL; + if (fileName == NULL) return 0; + + DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName); + + if (!UTIL_stat(fileName, &statbuf)) { + EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno)); + } + + if (!UTIL_isRegularFileStat(&statbuf)) { + EXM_THROW(32, "Dictionary %s must be a regular file.", fileName); + } + + fileHandle = fopen(fileName, "rb"); + + if (fileHandle == NULL) { + EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno)); + } + + fileSize = UTIL_getFileSizeStat(&statbuf); + { + size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX; + if (fileSize > dictSizeMax) { + EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)", + fileName, (unsigned)dictSizeMax); /* avoid extreme cases */ + } + } + *bufferPtr = malloc((size_t)fileSize); + if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno)); + { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle); + if (readSize != fileSize) { + EXM_THROW(35, "Error reading dictionary file %s : %s", + fileName, strerror(errno)); + } + } + fclose(fileHandle); + return (size_t)fileSize; +} + + + +/* FIO_checkFilenameCollisions() : + * Checks for and warns if there are any files that would have the same output path + */ +int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) { + const char **filenameTableSorted, *prevElem, *filename; + unsigned u; + + filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles); + if (!filenameTableSorted) { + DISPLAY("Unable to malloc new str array, not checking for name collisions\n"); + return 1; + } + + for (u = 0; u < nbFiles; ++u) { + filename = strrchr(filenameTable[u], PATH_SEP); + if (filename == NULL) { + filenameTableSorted[u] = filenameTable[u]; + } else { + filenameTableSorted[u] = filename+1; + } + } + + qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr); + prevElem = filenameTableSorted[0]; + for (u = 1; u < nbFiles; ++u) { + if (strcmp(prevElem, filenameTableSorted[u]) == 0) { + DISPLAY("WARNING: Two files have same filename: %s\n", prevElem); + } + prevElem = filenameTableSorted[u]; + } + + free((void*)filenameTableSorted); + return 0; +} + +static const char* +extractFilename(const char* path, char separator) +{ + const char* search = strrchr(path, separator); + if (search == NULL) return path; + return search+1; +} + +/* FIO_createFilename_fromOutDir() : + * Takes a source file name and specified output directory, and + * allocates memory for and returns a pointer to final path. + * This function never returns an error (it may abort() in case of pb) + */ +static char* +FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen) +{ + const char* filenameStart; + char separator; + char* result; + +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + separator = '\\'; +#else + separator = '/'; +#endif + + filenameStart = extractFilename(path, separator); +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ + filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */ +#endif + + result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1); + if (!result) { + EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno)); + } + + memcpy(result, outDirName, strlen(outDirName)); + if (outDirName[strlen(outDirName)-1] == separator) { + memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart)); + } else { + memcpy(result + strlen(outDirName), &separator, 1); + memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart)); + } + + return result; +} + +/* FIO_highbit64() : + * gives position of highest bit. + * note : only works for v > 0 ! + */ +static unsigned FIO_highbit64(unsigned long long v) +{ + unsigned count = 0; + assert(v != 0); + v >>= 1; + while (v) { v >>= 1; count++; } + return count; +} + +static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs, + unsigned long long const dictSize, + unsigned long long const maxSrcFileSize) +{ + unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize)); + unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX); + if (maxSize == UTIL_FILESIZE_UNKNOWN) + EXM_THROW(42, "Using --patch-from with stdin requires --stream-size"); + assert(maxSize != UTIL_FILESIZE_UNKNOWN); + if (maxSize > maxWindowSize) + EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB)); + FIO_setMemLimit(prefs, (unsigned)maxSize); +} + +/* FIO_removeMultiFilesWarning() : + * Returns 1 if the console should abort, 0 if console should proceed. + * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts. + * + * If -f is specified, or there is just 1 file, zstd will always proceed as usual. + * If --rm is specified, there will be a prompt asking for user confirmation. + * If -f is specified with --rm, zstd will proceed as usual + * If -q is specified with --rm, zstd will abort pre-emptively + * If neither flag is specified, zstd will prompt the user for confirmation to proceed. + * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q). + * However, if the output is stdout, we will always abort rather than displaying the warning prompt. + */ +static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff) +{ + int error = 0; + if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) { + if (g_display_prefs.displayLevel <= displayLevelCutoff) { + if (prefs->removeSrcFile) { + DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s\n", outFileName); + error = 1; + } + } else { + if (!strcmp(outFileName, stdoutmark)) { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n"); + } else { + DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName); + } + DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate the original directory tree. \n") + if (prefs->removeSrcFile) { + if (fCtx->hasStdoutOutput) { + DISPLAYLEVEL(1, "Aborting. Use -f if you really want to delete the files and output to stdout\n"); + error = 1; + } else { + error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput); + } + } + } + } + return error; +} + +#ifndef ZSTD_NOCOMPRESS + +/* ********************************************************************** + * Compression + ************************************************************************/ +typedef struct { + FILE* srcFile; + FILE* dstFile; + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + void* dictBuffer; + size_t dictBufferSize; + const char* dictFileName; + ZSTD_CStream* cctx; +} cRess_t; + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + assert(hashLog > 1); + return hashLog - btScale; +} + +static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs, + ZSTD_compressionParameters* comprParams, + unsigned long long const dictSize, + unsigned long long const maxSrcFileSize, + int cLevel) +{ + unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1; + ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize); + FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize); + if (fileWindowLog > ZSTD_WINDOWLOG_MAX) + DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n"); + comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog)); + if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) { + if (!prefs->ldmFlag) + DISPLAYLEVEL(1, "long mode automatically triggered\n"); + FIO_setLdmFlag(prefs, 1); + } + if (cParams.strategy >= ZSTD_btopt) { + DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n"); + DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n"); + DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n"); + DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX); + DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n"); + } +} + +static cRess_t FIO_createCResources(FIO_prefs_t* const prefs, + const char* dictFileName, unsigned long long const maxSrcFileSize, + int cLevel, ZSTD_compressionParameters comprParams) { + cRess_t ress; + memset(&ress, 0, sizeof(ress)); + + DISPLAYLEVEL(6, "FIO_createCResources \n"); + ress.cctx = ZSTD_createCCtx(); + if (ress.cctx == NULL) + EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx", + strerror(errno)); + ress.srcBufferSize = ZSTD_CStreamInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZSTD_CStreamOutSize(); + + /* need to update memLimit before calling createDictBuffer + * because of memLimit check inside it */ + if (prefs->patchFromMode) { + unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize; + FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel); + } + ress.dstBuffer = malloc(ress.dstBufferSize); + ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */ + if (!ress.srcBuffer || !ress.dstBuffer) + EXM_THROW(31, "allocation error : not enough memory"); + + /* Advanced parameters, including dictionary */ + if (dictFileName && (ress.dictBuffer==NULL)) + EXM_THROW(32, "allocation error : can't create dictBuffer"); + ress.dictFileName = dictFileName; + + if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog) + comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT; + + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) ); + /* compression level */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) ); + /* max compressed block size */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) ); + /* source size hint */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) ); + /* long distance matching */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) ); + if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) ); + } + if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) { + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) ); + } + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder)); + /* compression parameters */ + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) ); + /* multi-threading */ +#ifdef ZSTD_MULTITHREAD + DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) ); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) ); + if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) { + DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog); + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) ); + } + CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) ); +#endif + /* dictionary */ + if (prefs->patchFromMode) { + CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) ); + } else { + CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) ); + } + + return ress; +} + +static void FIO_freeCResources(const cRess_t* const ress) +{ + free(ress->srcBuffer); + free(ress->dstBuffer); + free(ress->dictBuffer); + ZSTD_freeCStream(ress->cctx); /* never fails */ +} + + +#ifdef ZSTD_GZCOMPRESS +static unsigned long long +FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */ + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + z_stream strm; + + if (compressionLevel > Z_BEST_COMPRESSION) + compressionLevel = Z_BEST_COMPRESSION; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, + 15 /* maxWindowLogSize */ + 16 /* gzip only */, + 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */ + if (ret != Z_OK) { + EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); + } } + + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + + while (1) { + int ret; + if (strm.avail_in == 0) { + size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); + if (inSize == 0) break; + inFileSize += inSize; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + strm.avail_in = (uInt)inSize; + } + ret = deflate(&strm, Z_NO_FLUSH); + if (ret != Z_OK) + EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret); + { size_t const cSize = ress->dstBufferSize - strm.avail_out; + if (cSize) { + if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) + EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno)); + outFileSize += cSize; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } } + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + } else { + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + } } + + while (1) { + int const ret = deflate(&strm, Z_FINISH); + { size_t const cSize = ress->dstBufferSize - strm.avail_out; + if (cSize) { + if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize) + EXM_THROW(75, "Write error : %s ", strerror(errno)); + outFileSize += cSize; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } } + if (ret == Z_STREAM_END) break; + if (ret != Z_BUF_ERROR) + EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret); + } + + { int const ret = deflateEnd(&strm); + if (ret != Z_OK) { + EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret); + } } + *readsize = inFileSize; + return outFileSize; +} +#endif + + +#ifdef ZSTD_LZMACOMPRESS +static unsigned long long +FIO_compressLzmaFrame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, U64* readsize, int plain_lzma) +{ + unsigned long long inFileSize = 0, outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret ret; + + if (compressionLevel < 0) compressionLevel = 0; + if (compressionLevel > 9) compressionLevel = 9; + + if (plain_lzma) { + lzma_options_lzma opt_lzma; + if (lzma_lzma_preset(&opt_lzma, compressionLevel)) + EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName); + ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */ + if (ret != LZMA_OK) + EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret); + } else { + ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */ + if (ret != LZMA_OK) + EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret); + } + + strm.next_in = 0; + strm.avail_in = 0; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + + while (1) { + if (strm.avail_in == 0) { + size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile); + if (inSize == 0) action = LZMA_FINISH; + inFileSize += inSize; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = inSize; + } + + ret = lzma_code(&strm, action); + + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret); + { size_t const compBytes = ress->dstBufferSize - strm.avail_out; + if (compBytes) { + if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes) + EXM_THROW(85, "Write error : %s", strerror(errno)); + outFileSize += compBytes; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } } + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + else + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + if (ret == LZMA_STREAM_END) break; + } + + lzma_end(&strm); + *readsize = inFileSize; + + return outFileSize; +} +#endif + +#ifdef ZSTD_LZ4COMPRESS + +#if LZ4_VERSION_NUMBER <= 10600 +#define LZ4F_blockLinked blockLinked +#define LZ4F_max64KB max64KB +#endif + +static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); } + +static unsigned long long +FIO_compressLz4Frame(cRess_t* ress, + const char* srcFileName, U64 const srcFileSize, + int compressionLevel, int checksumFlag, + U64* readsize) +{ + const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB); + unsigned long long inFileSize = 0, outFileSize = 0; + + LZ4F_preferences_t prefs; + LZ4F_compressionContext_t ctx; + + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) + EXM_THROW(31, "zstd: failed to create lz4 compression context"); + + memset(&prefs, 0, sizeof(prefs)); + + assert(blockSize <= ress->srcBufferSize); + + prefs.autoFlush = 1; + prefs.compressionLevel = compressionLevel; + prefs.frameInfo.blockMode = LZ4F_blockLinked; + prefs.frameInfo.blockSizeID = LZ4F_max64KB; + prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag; +#if LZ4_VERSION_NUMBER >= 10600 + prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize; +#endif + assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize); + + { + size_t readSize; + size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs); + if (LZ4F_isError(headerSize)) + EXM_THROW(33, "File header generation failed : %s", + LZ4F_getErrorName(headerSize)); + if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize) + EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno)); + outFileSize += headerSize; + + /* Read first block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + + /* Main Loop */ + while (readSize>0) { + size_t const outSize = LZ4F_compressUpdate(ctx, + ress->dstBuffer, ress->dstBufferSize, + ress->srcBuffer, readSize, NULL); + if (LZ4F_isError(outSize)) + EXM_THROW(35, "zstd: %s: lz4 compression failed : %s", + srcFileName, LZ4F_getErrorName(outSize)); + outFileSize += outSize; + if (srcFileSize == UTIL_FILESIZE_UNKNOWN) { + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), + (double)outFileSize/inFileSize*100) + } else { + DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%", + (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20), + (double)outFileSize/inFileSize*100); + } + + /* Write Block */ + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile); + if (sizeCheck != outSize) + EXM_THROW(36, "Write error : %s", strerror(errno)); + } + + /* Read next block */ + readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile); + inFileSize += readSize; + } + if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); + + /* End of Stream mark */ + headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL); + if (LZ4F_isError(headerSize)) + EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s", + srcFileName, LZ4F_getErrorName(headerSize)); + + { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile); + if (sizeCheck != headerSize) + EXM_THROW(39, "Write error : %s (cannot write end of stream)", + strerror(errno)); + } + outFileSize += headerSize; + } + + *readsize = inFileSize; + LZ4F_freeCompressionContext(ctx); + + return outFileSize; +} +#endif + + +static unsigned long long +FIO_compressZstdFrame(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const cRess_t* ressPtr, + const char* srcFileName, U64 fileSize, + int compressionLevel, U64* readsize) +{ + cRess_t const ress = *ressPtr; + FILE* const srcFile = ress.srcFile; + FILE* const dstFile = ress.dstFile; + U64 compressedfilesize = 0; + ZSTD_EndDirective directive = ZSTD_e_continue; + U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + + /* stats */ + ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 }; + ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 }; + typedef enum { noChange, slower, faster } speedChange_e; + speedChange_e speedChange = noChange; + unsigned flushWaiting = 0; + unsigned inputPresented = 0; + unsigned inputBlocked = 0; + unsigned lastJobID = 0; + UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize); + + DISPLAYLEVEL(6, "compression using zstd format \n"); + + /* init */ + if (fileSize != UTIL_FILESIZE_UNKNOWN) { + pledgedSrcSize = fileSize; + CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize)); + } else if (prefs->streamSrcSize > 0) { + /* unknown source size; use the declared stream size */ + pledgedSrcSize = prefs->streamSrcSize; + CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) ); + } + + { + int windowLog; + UTIL_HumanReadableSize_t windowSize; + CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog)); + if (windowLog == 0) { + const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0); + windowLog = cParams.windowLog; + } + windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize))); + DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix); + } + (void)srcFileName; + + /* Main compression loop */ + do { + size_t stillToFlush; + /* Fill input Buffer */ + size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile); + ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 }; + DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize); + *readsize += inSize; + + if ((inSize == 0) || (*readsize == fileSize)) + directive = ZSTD_e_end; + + stillToFlush = 1; + while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */ + || (directive == ZSTD_e_end && stillToFlush != 0) ) { + + size_t const oldIPos = inBuff.pos; + ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 }; + size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx); + CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive)); + + /* count stats */ + inputPresented++; + if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */ + if (!toFlushNow) flushWaiting = 1; + + /* Write compressed stream */ + DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n", + (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos); + if (outBuff.pos) { + size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile); + if (sizeCheck != outBuff.pos) + EXM_THROW(25, "Write error : %s (cannot write compressed block)", + strerror(errno)); + compressedfilesize += outBuff.pos; + } + + /* display notification; and adapt compression level */ + if (READY_FOR_UPDATE()) { + ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx); + double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100; + UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed); + UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed); + UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced); + + /* display progress notifications */ + if (g_display_prefs.displayLevel >= 3) { + DISPLAYUPDATE(3, "\r(L%i) Buffered :%6.*f%4s - Consumed :%6.*f%4s - Compressed :%6.*f%4s => %.2f%% ", + compressionLevel, + buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix, + consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix, + produced_hrs.precision, produced_hrs.value, produced_hrs.suffix, + cShare ); + } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) { + /* Require level 2 or forcibly displayed progress counter for summarized updates */ + DISPLAYLEVEL(1, "\r%79s\r", ""); /* Clear out the current displayed line */ + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + /* Ensure that the string we print is roughly the same size each time */ + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName); + } else { + DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName); + } + } + DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix); + if (fileSize != UTIL_FILESIZE_UNKNOWN) + DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix); + DISPLAYLEVEL(1, " ==> %2.f%%", cShare); + DELAY_NEXT_UPDATE(); + } + + /* adaptive mode : statistics measurement and speed correction */ + if (prefs->adaptiveMode) { + + /* check output speed */ + if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */ + + unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed; + assert(zfp.produced >= previous_zfp_update.produced); + assert(prefs->nbWorkers >= 1); + + /* test if compression is blocked + * either because output is slow and all buffers are full + * or because input is slow and no job can start while waiting for at least one buffer to be filled. + * note : exclude starting part, since currentJobID > 1 */ + if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/ + && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */ + ) { + DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n") + speedChange = slower; + } + + previous_zfp_update = zfp; + + if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */ + && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */ + ) { + DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed); + speedChange = slower; + } + flushWaiting = 0; + } + + /* course correct only if there is at least one new job completed */ + if (zfp.currentJobID > lastJobID) { + DISPLAYLEVEL(6, "compression level adaptation check \n") + + /* check input speed */ + if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */ + if (inputBlocked <= 0) { + DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n"); + speedChange = slower; + } else if (speedChange == noChange) { + unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested; + unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed; + unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced; + unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed; + previous_zfp_correction = zfp; + assert(inputPresented > 0); + DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n", + inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100, + (unsigned)newlyIngested, (unsigned)newlyConsumed, + (unsigned)newlyFlushed, (unsigned)newlyProduced); + if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */ + && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */ + && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */ + ) { + DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n", + newlyIngested, newlyConsumed, newlyProduced, newlyFlushed); + speedChange = faster; + } + } + inputBlocked = 0; + inputPresented = 0; + } + + if (speedChange == slower) { + DISPLAYLEVEL(6, "slower speed , higher compression \n") + compressionLevel ++; + if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel(); + if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel; + compressionLevel += (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + if (speedChange == faster) { + DISPLAYLEVEL(6, "faster speed , lighter compression \n") + compressionLevel --; + if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel; + compressionLevel -= (compressionLevel == 0); /* skip 0 */ + ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel); + } + speedChange = noChange; + + lastJobID = zfp.currentJobID; + } /* if (zfp.currentJobID > lastJobID) */ + } /* if (g_adaptiveMode) */ + } /* if (READY_FOR_UPDATE()) */ + } /* while ((inBuff.pos != inBuff.size) */ + } while (directive != ZSTD_e_end); + + if (ferror(srcFile)) { + EXM_THROW(26, "Read error : I/O error"); + } + if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) { + EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B", + (unsigned long long)*readsize, (unsigned long long)fileSize); + } + + return compressedfilesize; +} + +/*! FIO_compressFilename_internal() : + * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened. + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int +FIO_compressFilename_internal(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, const char* srcFileName, + int compressionLevel) +{ + UTIL_time_t const timeStart = UTIL_getTime(); + clock_t const cpuStart = clock(); + U64 readsize = 0; + U64 compressedfilesize = 0; + U64 const fileSize = UTIL_getFileSize(srcFileName); + DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize); + + /* compression format selection */ + switch (prefs->compressionType) { + default: + case FIO_zstdCompression: + compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize); + break; + + case FIO_gzipCompression: +#ifdef ZSTD_GZCOMPRESS + compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", + srcFileName); +#endif + break; + + case FIO_xzCompression: + case FIO_lzmaCompression: +#ifdef ZSTD_LZMACOMPRESS + compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n", + srcFileName); +#endif + break; + + case FIO_lz4Compression: +#ifdef ZSTD_LZ4COMPRESS + compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize); +#else + (void)compressionLevel; + EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n", + srcFileName); +#endif + break; + } + + /* Status */ + fCtx->totalBytesInput += (size_t)readsize; + fCtx->totalBytesOutput += (size_t)compressedfilesize; + DISPLAYLEVEL(2, "\r%79s\r", ""); + if (g_display_prefs.displayLevel >= 2 && + !fCtx->hasStdoutOutput && + (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) { + UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize); + UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize); + if (readsize == 0) { + DISPLAYLEVEL(2,"%-20s : (%6.*f%4s => %6.*f%4s, %s) \n", + srcFileName, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix, + dstFileName); + } else { + DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6.*f%4s => %6.*f%4s, %s) \n", + srcFileName, + (double)compressedfilesize / (double)readsize * 100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix, + dstFileName); + } + } + + /* Elapsed Time and CPU Load */ + { clock_t const cpuEnd = clock(); + double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; + U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); + double const timeLength_s = (double)timeLength_ns / 1000000000; + double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100; + DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n", + srcFileName, timeLength_s, cpuLoad_pct); + } + return 0; +} + + +/*! FIO_compressFilename_dstFile() : + * open dstFileName, or pass-through if ress.dstFile != NULL, + * then start compression with FIO_compressFilename_internal(). + * Manages source removal (--rm) and file permissions transfer. + * note : ress.srcFile must be != NULL, + * so reach this function through FIO_compressFilename_srcFile(). + * @return : 0 : compression completed correctly, + * 1 : pb + */ +static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, + const char* srcFileName, + int compressionLevel) +{ + int closeDstFile = 0; + int result; + stat_t statbuf; + int transferMTime = 0; + assert(ress.srcFile != NULL); + if (ress.dstFile == NULL) { + int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; + if ( strcmp (srcFileName, stdinmark) + && strcmp (dstFileName, stdoutmark) + && UTIL_stat(srcFileName, &statbuf) + && UTIL_isRegularFileStat(&statbuf) ) { + dstFilePermissions = statbuf.st_mode; + transferMTime = 1; + } + + closeDstFile = 1; + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName); + ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); + if (ress.dstFile==NULL) return 1; /* could not open dstFileName */ + /* Must only be added after FIO_openDstFile() succeeds. + * Otherwise we may delete the destination file if it already exists, + * and the user presses Ctrl-C when asked if they wish to overwrite. + */ + addHandler(dstFileName); + } + + result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + + if (closeDstFile) { + FILE* const dstFile = ress.dstFile; + ress.dstFile = NULL; + + clearHandler(); + + DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName); + if (fclose(dstFile)) { /* error closing dstFile */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result=1; + } + if (transferMTime) { + UTIL_utime(dstFileName, &statbuf); + } + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ + ) { + FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */ + } + } + + return result; +} + +/* List used to compare file extensions (used with --exclude-compressed flag) +* Different from the suffixList and should only apply to ZSTD compress operationResult +*/ +static const char *compressedFileExtensions[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, + GZ_EXTENSION, + TGZ_EXTENSION, + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, + LZ4_EXTENSION, + TLZ4_EXTENSION, + NULL +}; + +/*! FIO_compressFilename_srcFile() : + * @return : 0 : compression completed correctly, + * 1 : missing or pb opening srcFileName + */ +static int +FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + cRess_t ress, + const char* dstFileName, + const char* srcFileName, + int compressionLevel) +{ + int result; + DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName); + + /* ensure src is not a directory */ + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + /* ensure src is not the same as dict (if present) */ + if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) { + DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName); + return 1; + } + + /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used + * YES => ZSTD will skip compression of the file and will return 0. + * NO => ZSTD will resume with compress operation. + */ + if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) { + DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName); + return 0; + } + + ress.srcFile = FIO_openSrcFile(prefs, srcFileName); + if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */ + + result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + + fclose(ress.srcFile); + ress.srcFile = NULL; + if ( prefs->removeSrcFile /* --rm */ + && result == 0 /* success */ + && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */ + ) { + /* We must clear the handler, since after this point calling it would + * delete both the source and destination files. + */ + clearHandler(); + if (FIO_removeFile(srcFileName)) + EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno)); + } + return result; +} + +static const char* checked_index(const char* options[], size_t length, size_t index) { + assert(index < length); + // Necessary to avoid warnings since -O3 will omit the above `assert` + (void) length; + return options[index]; +} + +#define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (index)) + +void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) { + static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, + LZMA_EXTENSION, LZ4_EXTENSION}; + static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"}; + static const char* checkSumOptions[3] = {" --no-check", "", " --check"}; + static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"}; + static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"}; + + assert(g_display_prefs.displayLevel >= 4); + + DISPLAY("--format=%s", formatOptions[prefs->compressionType]); + DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport)); + DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID"); + DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag)); + DISPLAY(" --block-size=%d", prefs->blockSize); + if (prefs->adaptiveMode) + DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel); + DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder)); + DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : ""); + if (prefs->streamSrcSize) + DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize); + if (prefs->srcSizeHint) + DISPLAY(" --size-hint=%d", prefs->srcSizeHint); + if (prefs->targetCBlockSize) + DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize); + DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode)); + DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB); + DISPLAY(" --threads=%d", prefs->nbWorkers); + DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : ""); + DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-"); + DISPLAY("\n"); +} + +#undef INDEX + +int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName, + const char* srcFileName, const char* dictFileName, + int compressionLevel, ZSTD_compressionParameters comprParams) +{ + cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams); + int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + +#define DISPLAY_LEVEL_DEFAULT 2 + + FIO_freeCResources(&ress); + return result; +} + +/* FIO_determineCompressedName() : + * create a destination filename for compressed srcFileName. + * @return a pointer to it. + * This function never returns an error (it may abort() in case of pb) + */ +static const char* +FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix) +{ + static size_t dfnbCapacity = 0; + static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + char* outDirFilename = NULL; + size_t sfnSize = strlen(srcFileName); + size_t const srcSuffixLen = strlen(suffix); + if (outDirName) { + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen); + sfnSize = strlen(outDirFilename); + assert(outDirFilename != NULL); + } + + if (dfnbCapacity <= sfnSize+srcSuffixLen+1) { + /* resize buffer for dstName */ + free(dstFileNameBuffer); + dfnbCapacity = sfnSize + srcSuffixLen + 30; + dstFileNameBuffer = (char*)malloc(dfnbCapacity); + if (!dstFileNameBuffer) { + EXM_THROW(30, "zstd: %s", strerror(errno)); + } + } + assert(dstFileNameBuffer != NULL); + + if (outDirFilename) { + memcpy(dstFileNameBuffer, outDirFilename, sfnSize); + free(outDirFilename); + } else { + memcpy(dstFileNameBuffer, srcFileName, sfnSize); + } + memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */); + return dstFileNameBuffer; +} + +static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles) +{ + size_t i; + unsigned long long fileSize, maxFileSize = 0; + for (i = 0; i < nbFiles; i++) { + fileSize = UTIL_getFileSize(inFileNames[i]); + maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize; + } + return maxFileSize; +} + +/* FIO_compressMultipleFilenames() : + * compress nbFiles files + * into either one destination (outFileName), + * or into one file each (outFileName == NULL, but suffix != NULL), + * or into a destination folder (specified with -O) + */ +int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** inFileNamesTable, + const char* outMirroredRootDirName, + const char* outDirName, + const char* outFileName, const char* suffix, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters comprParams) +{ + int status; + int error = 0; + cRess_t ress = FIO_createCResources(prefs, dictFileName, + FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal), + compressionLevel, comprParams); + + /* init */ + assert(outFileName != NULL || suffix != NULL); + if (outFileName != NULL) { /* output into a single destination (stdout typically) */ + if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeCResources(&ress); + return 1; + } + ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (ress.dstFile == NULL) { /* could not open outFileName */ + error = 1; + } else { + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if (fclose(ress.dstFile)) + EXM_THROW(29, "Write error (%s) : cannot properly close %s", + strerror(errno), outFileName); + ress.dstFile = NULL; + } + } else { + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); + + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) { + const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName); + error=1; + continue; + } + } else { + dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */ + } + status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + + if (outDirName) + FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal); + } + + if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) { + UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput); + UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput); + + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "%3d files compressed :%.2f%% (%6.*f%4s => %6.*f%4s)\n", + fCtx->nbFilesProcessed, + (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100, + hr_isize.precision, hr_isize.value, hr_isize.suffix, + hr_osize.precision, hr_osize.value, hr_osize.suffix); + } + + FIO_freeCResources(&ress); + return error; +} + +#endif /* #ifndef ZSTD_NOCOMPRESS */ + + + +#ifndef ZSTD_NODECOMPRESS + +/* ************************************************************************** + * Decompression + ***************************************************************************/ +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + size_t srcBufferLoaded; + void* dstBuffer; + size_t dstBufferSize; + ZSTD_DStream* dctx; + FILE* dstFile; +} dRess_t; + +static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName) +{ + dRess_t ress; + memset(&ress, 0, sizeof(ress)); + + if (prefs->patchFromMode) + FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */); + + /* Allocation */ + ress.dctx = ZSTD_createDStream(); + if (ress.dctx==NULL) + EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno)); + CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) ); + CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag)); + + ress.srcBufferSize = ZSTD_DStreamInSize(); + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = ZSTD_DStreamOutSize(); + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) + EXM_THROW(61, "Allocation error : not enough memory"); + + /* dictionary */ + { void* dictBuffer; + size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs); + CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) ); + free(dictBuffer); + } + + return ress; +} + +static void FIO_freeDResources(dRess_t ress) +{ + CHECK( ZSTD_freeDStream(ress.dctx) ); + free(ress.srcBuffer); + free(ress.dstBuffer); +} + + +/** FIO_fwriteSparse() : +* @return : storedSkips, +* argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */ +static unsigned +FIO_fwriteSparse(FILE* file, + const void* buffer, size_t bufferSize, + const FIO_prefs_t* const prefs, + unsigned storedSkips) +{ + const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ + size_t bufferSizeT = bufferSize / sizeof(size_t); + const size_t* const bufferTEnd = bufferT + bufferSizeT; + const size_t* ptrT = bufferT; + static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */ + + if (prefs->testMode) return 0; /* do not output anything in test mode */ + + if (!prefs->sparseFileSupport) { /* normal write */ + size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); + if (sizeCheck != bufferSize) + EXM_THROW(70, "Write error : cannot write decoded block : %s", + strerror(errno)); + return 0; + } + + /* avoid int overflow */ + if (storedSkips > 1 GB) { + if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0) + EXM_THROW(91, "1 GB skip error (sparse file support)"); + storedSkips -= 1 GB; + } + + while (ptrT < bufferTEnd) { + size_t nb0T; + + /* adjust last segment if < 32 KB */ + size_t seg0SizeT = segmentSizeT; + if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; + bufferSizeT -= seg0SizeT; + + /* count leading zeroes */ + for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeof(size_t)); + + if (nb0T != seg0SizeT) { /* not all 0s */ + size_t const nbNon0ST = seg0SizeT - nb0T; + /* skip leading zeros */ + if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) + EXM_THROW(92, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + /* write the rest */ + if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST) + EXM_THROW(93, "Write error : cannot write decoded block : %s", + strerror(errno)); + } + ptrT += seg0SizeT; + } + + { static size_t const maskT = sizeof(size_t)-1; + if (bufferSize & maskT) { + /* size not multiple of sizeof(size_t) : implies end of block */ + const char* const restStart = (const char*)bufferTEnd; + const char* restPtr = restStart; + const char* const restEnd = (const char*)buffer + bufferSize; + assert(restEnd > restStart && restEnd < restStart + sizeof(size_t)); + for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) { + /* not all remaining bytes are 0 */ + size_t const restSize = (size_t)(restEnd - restPtr); + if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0) + EXM_THROW(92, "Sparse skip error ; try --no-sparse"); + if (fwrite(restPtr, 1, restSize, file) != restSize) + EXM_THROW(95, "Write error : cannot write end of decoded block : %s", + strerror(errno)); + storedSkips = 0; + } } } + + return storedSkips; +} + +static void +FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips) +{ + if (prefs->testMode) assert(storedSkips == 0); + if (storedSkips>0) { + assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */ + (void)prefs; /* assert can be disabled, in which case prefs becomes unused */ + if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0) + EXM_THROW(69, "Final skip error (sparse file support)"); + /* last zero must be explicitly written, + * so that skipped ones get implicitly translated as zero by FS */ + { const char lastZeroByte[1] = { 0 }; + if (fwrite(lastZeroByte, 1, 1, file) != 1) + EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno)); + } } +} + + +/** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode + @return : 0 (no error) */ +static int FIO_passThrough(const FIO_prefs_t* const prefs, + FILE* foutput, FILE* finput, + void* buffer, size_t bufferSize, + size_t alreadyLoaded) +{ + size_t const blockSize = MIN(64 KB, bufferSize); + size_t readFromInput; + unsigned storedSkips = 0; + + /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */ + { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput); + if (sizeCheck != alreadyLoaded) { + DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno)); + return 1; + } } + + do { + readFromInput = fread(buffer, 1, blockSize, finput); + storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips); + } while (readFromInput == blockSize); + if (ferror(finput)) { + DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno)); + return 1; + } + assert(feof(finput)); + + FIO_fwriteSparseEnd(prefs, foutput, storedSkips); + return 0; +} + +/* FIO_zstdErrorHelp() : + * detailed error message when requested window size is too large */ +static void +FIO_zstdErrorHelp(const FIO_prefs_t* const prefs, + const dRess_t* ress, + size_t err, const char* srcFileName) +{ + ZSTD_frameHeader header; + + /* Help message only for one specific error */ + if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge) + return; + + /* Try to decode the frame header */ + err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded); + if (err == 0) { + unsigned long long const windowSize = header.windowSize; + unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0); + assert(prefs->memLimit > 0); + DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n", + srcFileName, windowSize, prefs->memLimit); + if (windowLog <= ZSTD_WINDOWLOG_MAX) { + unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0)); + assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */ + DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n", + srcFileName, windowLog, windowMB); + return; + } } + DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n", + srcFileName, ZSTD_WINDOWLOG_MAX); +} + +/** FIO_decompressFrame() : + * @return : size of decoded zstd frame, or an error code + */ +#define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2)) +static unsigned long long +FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput, + const FIO_prefs_t* const prefs, + const char* srcFileName, + U64 alreadyDecoded) /* for multi-frames streams */ +{ + U64 frameSize = 0; + U32 storedSkips = 0; + + /* display last 20 characters only */ + { size_t const srcFileLength = strlen(srcFileName); + if (srcFileLength>20) srcFileName += srcFileLength-20; + } + + ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only); + + /* Header loading : ensures ZSTD_getFrameHeader() will succeed */ + { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX; + if (ress->srcBufferLoaded < toDecode) { + size_t const toRead = toDecode - ress->srcBufferLoaded; + void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; + ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput); + } } + + /* Main decompression Loop */ + while (1) { + ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 }; + ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 }; + size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff); + const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2; + UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize); + if (ZSTD_isError(readSizeHint)) { + DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n", + srcFileName, ZSTD_getErrorName(readSizeHint)); + FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName); + return FIO_ERROR_FRAME_DECODING; + } + + /* Write block */ + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips); + frameSize += outBuff.pos; + if (fCtx->nbFilesTotal > 1) { + size_t srcFileNameSize = strlen(srcFileName); + if (srcFileNameSize > 18) { + const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15; + DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix); + } else { + DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ", + fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix); + } + } else { + DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s... ", + srcFileName, hrs.precision, hrs.value, hrs.suffix); + } + + if (inBuff.pos > 0) { + memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos); + ress->srcBufferLoaded -= inBuff.pos; + } + + if (readSizeHint == 0) break; /* end of frame */ + + /* Fill input buffer */ + { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */ + if (ress->srcBufferLoaded < toDecode) { + size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */ + void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded; + size_t const readSize = fread(startPosition, 1, toRead, finput); + if (readSize==0) { + DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n", + srcFileName); + return FIO_ERROR_FRAME_DECODING; + } + ress->srcBufferLoaded += readSize; + } } } + + FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + + return frameSize; +} + + +#ifdef ZSTD_GZDECOMPRESS +static unsigned long long +FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile, + const FIO_prefs_t* const prefs, + const char* srcFileName) +{ + unsigned long long outFileSize = 0; + z_stream strm; + int flush = Z_NO_FLUSH; + int decodingError = 0; + unsigned storedSkips = 0; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.next_in = 0; + strm.avail_in = 0; + /* see http://www.zlib.net/manual.html */ + if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) + return FIO_ERROR_FRAME_DECODING; + + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + strm.avail_in = (uInt)ress->srcBufferLoaded; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + + for ( ; ; ) { + int ret; + if (strm.avail_in == 0) { + ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); + if (ress->srcBufferLoaded == 0) flush = Z_FINISH; + strm.next_in = (z_const unsigned char*)ress->srcBuffer; + strm.avail_in = (uInt)ress->srcBufferLoaded; + } + ret = inflate(&strm, flush); + if (ret == Z_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName); + decodingError = 1; break; + } + if (ret != Z_OK && ret != Z_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); + outFileSize += decompBytes; + strm.next_out = (Bytef*)ress->dstBuffer; + strm.avail_out = (uInt)ress->dstBufferSize; + } + } + if (ret == Z_STREAM_END) break; + } + + if (strm.avail_in > 0) + memmove(ress->srcBuffer, strm.next_in, strm.avail_in); + ress->srcBufferLoaded = strm.avail_in; + if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */ + && (decodingError==0) ) { + DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName); + decodingError = 1; + } + FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + + +#ifdef ZSTD_LZMADECOMPRESS +static unsigned long long +FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile, + const FIO_prefs_t* const prefs, + const char* srcFileName, int plain_lzma) +{ + unsigned long long outFileSize = 0; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_action action = LZMA_RUN; + lzma_ret initRet; + int decodingError = 0; + unsigned storedSkips = 0; + + strm.next_in = 0; + strm.avail_in = 0; + if (plain_lzma) { + initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */ + } else { + initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */ + } + + if (initRet != LZMA_OK) { + DISPLAYLEVEL(1, "zstd: %s: %s error %d \n", + plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder", + srcFileName, initRet); + return FIO_ERROR_FRAME_DECODING; + } + + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = ress->srcBufferLoaded; + + for ( ; ; ) { + lzma_ret ret; + if (strm.avail_in == 0) { + ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile); + if (ress->srcBufferLoaded == 0) action = LZMA_FINISH; + strm.next_in = (BYTE const*)ress->srcBuffer; + strm.avail_in = ress->srcBufferLoaded; + } + ret = lzma_code(&strm, action); + + if (ret == LZMA_BUF_ERROR) { + DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName); + decodingError = 1; break; + } + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { + DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n", + srcFileName, ret); + decodingError = 1; break; + } + { size_t const decompBytes = ress->dstBufferSize - strm.avail_out; + if (decompBytes) { + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips); + outFileSize += decompBytes; + strm.next_out = (BYTE*)ress->dstBuffer; + strm.avail_out = ress->dstBufferSize; + } } + if (ret == LZMA_STREAM_END) break; + } + + if (strm.avail_in > 0) + memmove(ress->srcBuffer, strm.next_in, strm.avail_in); + ress->srcBufferLoaded = strm.avail_in; + lzma_end(&strm); + FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize; +} +#endif + +#ifdef ZSTD_LZ4DECOMPRESS +static unsigned long long +FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile, + const FIO_prefs_t* const prefs, + const char* srcFileName) +{ + unsigned long long filesize = 0; + LZ4F_errorCode_t nextToLoad; + LZ4F_decompressionContext_t dCtx; + LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + int decodingError = 0; + unsigned storedSkips = 0; + + if (LZ4F_isError(errorCode)) { + DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n"); + return FIO_ERROR_FRAME_DECODING; + } + + /* Init feed with magic number (already consumed from FILE* sFile) */ + { size_t inSize = 4; + size_t outSize= 0; + MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER); + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL); + if (LZ4F_isError(nextToLoad)) { + DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n", + srcFileName, LZ4F_getErrorName(nextToLoad)); + LZ4F_freeDecompressionContext(dCtx); + return FIO_ERROR_FRAME_DECODING; + } } + + /* Main Loop */ + for (;nextToLoad;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress->dstBufferSize; + + /* Read input */ + if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize; + readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */ + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + decodedBytes = ress->dstBufferSize; + nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) { + DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n", + srcFileName, LZ4F_getErrorName(nextToLoad)); + decodingError = 1; nextToLoad = 0; break; + } + pos += remaining; + + /* Write Block */ + if (decodedBytes) { + UTIL_HumanReadableSize_t hrs; + storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips); + filesize += decodedBytes; + hrs = UTIL_makeHumanReadableSize(filesize); + DISPLAYUPDATE(2, "\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix); + } + + if (!nextToLoad) break; + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) { + DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName); + decodingError=1; + } + + if (nextToLoad!=0) { + DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName); + decodingError=1; + } + + LZ4F_freeDecompressionContext(dCtx); + ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */ + FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips); + + return decodingError ? FIO_ERROR_FRAME_DECODING : filesize; +} +#endif + + + +/** FIO_decompressFrames() : + * Find and decode frames inside srcFile + * srcFile presumed opened and valid + * @return : 0 : OK + * 1 : error + */ +static int FIO_decompressFrames(FIO_ctx_t* const fCtx, + dRess_t ress, FILE* srcFile, + const FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName) +{ + unsigned readSomething = 0; + unsigned long long filesize = 0; + assert(srcFile != NULL); + + /* for each frame */ + for ( ; ; ) { + /* check magic number -> version */ + size_t const toRead = 4; + const BYTE* const buf = (const BYTE*)ress.srcBuffer; + if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */ + ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded, + (size_t)1, toRead - ress.srcBufferLoaded, srcFile); + if (ress.srcBufferLoaded==0) { + if (readSomething==0) { /* srcFile is empty (which is invalid) */ + DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName); + return 1; + } /* else, just reached frame boundary */ + break; /* no more input */ + } + readSomething = 1; /* there is at least 1 byte in srcFile */ + if (ress.srcBufferLoaded < toRead) { + DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName); + return 1; + } + if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) { + unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; + } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ +#ifdef ZSTD_GZDECOMPRESS + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); + return 1; +#endif + } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ + || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */ +#ifdef ZSTD_LZMADECOMPRESS + unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName); + return 1; +#endif + } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) { +#ifdef ZSTD_LZ4DECOMPRESS + unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName); + if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; + filesize += frameSize; +#else + DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName); + return 1; +#endif + } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */ + return FIO_passThrough(prefs, + ress.dstFile, srcFile, + ress.srcBuffer, ress.srcBufferSize, + ress.srcBufferLoaded); + } else { + DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName); + return 1; + } } /* for each frame */ + + /* Final Status */ + fCtx->totalBytesOutput += (size_t)filesize; + DISPLAYLEVEL(2, "\r%79s\r", ""); + /* No status message in pipe mode (stdin - stdout) or multi-files mode */ + if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) || + g_display_prefs.displayLevel >= 3 || + g_display_prefs.progressSetting == FIO_ps_always) { + DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize); + } + + return 0; +} + +/** FIO_decompressDstFile() : + open `dstFileName`, + or path-through if ress.dstFile is already != 0, + then start decompression process (FIO_decompressFrames()). + @return : 0 : OK + 1 : operation aborted +*/ +static int FIO_decompressDstFile(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + dRess_t ress, FILE* srcFile, + const char* dstFileName, const char* srcFileName) +{ + int result; + stat_t statbuf; + int releaseDstFile = 0; + int transferMTime = 0; + + if ((ress.dstFile == NULL) && (prefs->testMode==0)) { + int dstFilePermissions = DEFAULT_FILE_PERMISSIONS; + if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */ + && strcmp(dstFileName, stdoutmark) + && UTIL_stat(srcFileName, &statbuf) + && UTIL_isRegularFileStat(&statbuf) ) { + dstFilePermissions = statbuf.st_mode; + transferMTime = 1; + } + + releaseDstFile = 1; + + ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions); + if (ress.dstFile==NULL) return 1; + + /* Must only be added after FIO_openDstFile() succeeds. + * Otherwise we may delete the destination file if it already exists, + * and the user presses Ctrl-C when asked if they wish to overwrite. + */ + addHandler(dstFileName); + } + + result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName); + + if (releaseDstFile) { + FILE* const dstFile = ress.dstFile; + clearHandler(); + ress.dstFile = NULL; + if (fclose(dstFile)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno)); + result = 1; + } + + if (transferMTime) { + UTIL_utime(dstFileName, &statbuf); + } + + if ( (result != 0) /* operation failure */ + && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */ + ) { + FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */ + } + } + + return result; +} + + +/** FIO_decompressSrcFile() : + Open `srcFileName`, transfer control to decompressDstFile() + @return : 0 : OK + 1 : error +*/ +static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName) +{ + FILE* srcFile; + int result; + + if (UTIL_isDirectory(srcFileName)) { + DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName); + return 1; + } + + srcFile = FIO_openSrcFile(prefs, srcFileName); + if (srcFile==NULL) return 1; + ress.srcBufferLoaded = 0; + + result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName); + + /* Close file */ + if (fclose(srcFile)) { + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */ + return 1; + } + if ( prefs->removeSrcFile /* --rm */ + && (result==0) /* decompression successful */ + && strcmp(srcFileName, stdinmark) ) /* not stdin */ { + /* We must clear the handler, since after this point calling it would + * delete both the source and destination files. + */ + clearHandler(); + if (FIO_removeFile(srcFileName)) { + /* failed to remove src file */ + DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + return 1; + } } + return result; +} + + + +int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, + const char* dstFileName, const char* srcFileName, + const char* dictFileName) +{ + dRess_t const ress = FIO_createDResources(prefs, dictFileName); + + int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); + + FIO_freeDResources(ress); + return decodingError; +} + +static const char *suffixList[] = { + ZSTD_EXTENSION, + TZSTD_EXTENSION, +#ifndef ZSTD_NODECOMPRESS + ZSTD_ALT_EXTENSION, +#endif +#ifdef ZSTD_GZDECOMPRESS + GZ_EXTENSION, + TGZ_EXTENSION, +#endif +#ifdef ZSTD_LZMADECOMPRESS + LZMA_EXTENSION, + XZ_EXTENSION, + TXZ_EXTENSION, +#endif +#ifdef ZSTD_LZ4DECOMPRESS + LZ4_EXTENSION, + TLZ4_EXTENSION, +#endif + NULL +}; + +static const char *suffixListStr = + ZSTD_EXTENSION "/" TZSTD_EXTENSION +#ifdef ZSTD_GZDECOMPRESS + "/" GZ_EXTENSION "/" TGZ_EXTENSION +#endif +#ifdef ZSTD_LZMADECOMPRESS + "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION +#endif +#ifdef ZSTD_LZ4DECOMPRESS + "/" LZ4_EXTENSION "/" TLZ4_EXTENSION +#endif +; + +/* FIO_determineDstName() : + * create a destination filename from a srcFileName. + * @return a pointer to it. + * @return == NULL if there is an error */ +static const char* +FIO_determineDstName(const char* srcFileName, const char* outDirName) +{ + static size_t dfnbCapacity = 0; + static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */ + size_t dstFileNameEndPos; + char* outDirFilename = NULL; + const char* dstSuffix = ""; + size_t dstSuffixLen = 0; + + size_t sfnSize = strlen(srcFileName); + + size_t srcSuffixLen; + const char* const srcSuffix = strrchr(srcFileName, '.'); + if (srcSuffix == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + srcSuffixLen = strlen(srcSuffix); + + { + const char** matchedSuffixPtr; + for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) { + if (!strcmp(*matchedSuffixPtr, srcSuffix)) { + break; + } + } + + /* check suffix is authorized */ + if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) { + DISPLAYLEVEL(1, + "zstd: %s: unknown suffix (%s expected). " + "Can't derive the output file name. " + "Specify it with -o dstFileName. Ignoring.\n", + srcFileName, suffixListStr); + return NULL; + } + + if ((*matchedSuffixPtr)[1] == 't') { + dstSuffix = ".tar"; + dstSuffixLen = strlen(dstSuffix); + } + } + + if (outDirName) { + outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0); + sfnSize = strlen(outDirFilename); + assert(outDirFilename != NULL); + } + + if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) { + /* allocate enough space to write dstFilename into it */ + free(dstFileNameBuffer); + dfnbCapacity = sfnSize + 20; + dstFileNameBuffer = (char*)malloc(dfnbCapacity); + if (dstFileNameBuffer==NULL) + EXM_THROW(74, "%s : not enough memory for dstFileName", + strerror(errno)); + } + + /* return dst name == src name truncated from suffix */ + assert(dstFileNameBuffer != NULL); + dstFileNameEndPos = sfnSize - srcSuffixLen; + if (outDirFilename) { + memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos); + free(outDirFilename); + } else { + memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos); + } + + /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar" + * extension on decompression. Also writes terminating null. */ + strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix); + return dstFileNameBuffer; + + /* note : dstFileNameBuffer memory is not going to be free */ +} + +int +FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** srcNamesTable, + const char* outMirroredRootDirName, + const char* outDirName, const char* outFileName, + const char* dictFileName) +{ + int status; + int error = 0; + dRess_t ress = FIO_createDResources(prefs, dictFileName); + + if (outFileName) { + if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) { + FIO_freeDResources(ress); + return 1; + } + if (!prefs->testMode) { + ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS); + if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName); + } + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { + status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if ((!prefs->testMode) && (fclose(ress.dstFile))) + EXM_THROW(72, "Write error : %s : cannot properly close output file", + strerror(errno)); + } else { + if (outMirroredRootDirName) + UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName); + + for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */ + const char* const srcFileName = srcNamesTable[fCtx->currFileIdx]; + const char* dstFileName = NULL; + if (outMirroredRootDirName) { + char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName); + if (validMirroredDirName) { + dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName); + free(validMirroredDirName); + } else { + DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName); + } + } else { + dstFileName = FIO_determineDstName(srcFileName, outDirName); + } + if (dstFileName == NULL) { error=1; continue; } + status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName); + if (!status) fCtx->nbFilesProcessed++; + error |= status; + } + if (outDirName) + FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal); + } + + if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0) + DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput); + + FIO_freeDResources(ress); + return error; +} + +/* ************************************************************************** + * .zst file info (--list command) + ***************************************************************************/ + +typedef struct { + U64 decompressedSize; + U64 compressedSize; + U64 windowSize; + int numActualFrames; + int numSkippableFrames; + int decompUnavailable; + int usesCheck; + U32 nbFiles; +} fileInfo_t; + +typedef enum { + info_success=0, + info_frame_error=1, + info_not_zstd=2, + info_file_error=3, + info_truncated_input=4, +} InfoError; + +#define ERROR_IF(c,n,...) { \ + if (c) { \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + return n; \ + } \ +} + +static InfoError +FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile) +{ + /* begin analyzing frame */ + for ( ; ; ) { + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile); + if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) { + if ( feof(srcFile) + && (numBytesRead == 0) + && (info->compressedSize > 0) + && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) { + unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile); + unsigned long long file_size = (unsigned long long) info->compressedSize; + ERROR_IF(file_position != file_size, info_truncated_input, + "Error: seeked to position %llu, which is beyond file size of %llu\n", + file_position, + file_size); + break; /* correct end of file => success */ + } + ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame"); + ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames"); + } + { U32 const magicNumber = MEM_readLE32(headerBuffer); + /* Zstandard frame */ + if (magicNumber == ZSTD_MAGICNUMBER) { + ZSTD_frameHeader header; + U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead); + if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR + || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) { + info->decompUnavailable = 1; + } else { + info->decompressedSize += frameContentSize; + } + ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0, + info_frame_error, "Error: could not decode frame header"); + info->windowSize = header.windowSize; + /* move to the end of the frame header */ + { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead); + ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size"); + ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0, + info_frame_error, "Error: could not move to end of frame header"); + } + + /* skip all blocks in the frame */ + { int lastBlock = 0; + do { + BYTE blockHeaderBuffer[3]; + ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3, + info_frame_error, "Error while reading block header"); + { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer); + U32 const blockTypeID = (blockHeader >> 1) & 3; + U32 const isRLE = (blockTypeID == 1); + U32 const isWrongBlock = (blockTypeID == 3); + long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3); + ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type"); + lastBlock = blockHeader & 1; + ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0, + info_frame_error, "Error: could not skip to end of block"); + } + } while (lastBlock != 1); + } + + /* check if checksum is used */ + { BYTE const frameHeaderDescriptor = headerBuffer[4]; + int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2; + if (contentChecksumFlag) { + info->usesCheck = 1; + ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0, + info_frame_error, "Error: could not skip past checksum"); + } } + info->numActualFrames++; + } + /* Skippable frame */ + else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + U32 const frameSize = MEM_readLE32(headerBuffer + 4); + long const seek = (long)(8 + frameSize - numBytesRead); + ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0, + info_frame_error, "Error: could not find end of skippable frame"); + info->numSkippableFrames++; + } + /* unknown content */ + else { + return info_not_zstd; + } + } /* magic number analysis */ + } /* end analyzing frames */ + return info_success; +} + + +static InfoError +getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName) +{ + InfoError status; + FILE* const srcFile = FIO_openSrcFile(NULL, inFileName); + ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName); + + info->compressedSize = UTIL_getFileSize(inFileName); + status = FIO_analyzeFrames(info, srcFile); + + fclose(srcFile); + info->nbFiles = 1; + return status; +} + + +/** getFileInfo() : + * Reads information from file, stores in *info + * @return : InfoError status + */ +static InfoError +getFileInfo(fileInfo_t* info, const char* srcFileName) +{ + ERROR_IF(!UTIL_isRegularFile(srcFileName), + info_file_error, "Error : %s is not a file", srcFileName); + return getFileInfo_fileConfirmed(info, srcFileName); +} + + +static void +displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel) +{ + UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize); + UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize); + UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize); + double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize; + const char* const checkString = (info->usesCheck ? "XXH64" : "None"); + if (displayLevel <= 2) { + if (!info->decompUnavailable) { + DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + ratio, checkString, inFileName); + } else { + DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n", + info->numSkippableFrames + info->numActualFrames, + info->numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + checkString, inFileName); + } + } else { + DISPLAYOUT("%s \n", inFileName); + DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames); + if (info->numSkippableFrames) + DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames); + DISPLAYOUT("Window Size: %.*f%s (%llu B)\n", + window_hrs.precision, window_hrs.value, window_hrs.suffix, + (unsigned long long)info->windowSize); + DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n", + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + (unsigned long long)info->compressedSize); + if (!info->decompUnavailable) { + DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n", + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + (unsigned long long)info->decompressedSize); + DISPLAYOUT("Ratio: %.4f\n", ratio); + } + DISPLAYOUT("Check: %s\n", checkString); + DISPLAYOUT("\n"); + } +} + +static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2) +{ + fileInfo_t total; + memset(&total, 0, sizeof(total)); + total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames; + total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames; + total.compressedSize = fi1.compressedSize + fi2.compressedSize; + total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize; + total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable; + total.usesCheck = fi1.usesCheck & fi2.usesCheck; + total.nbFiles = fi1.nbFiles + fi2.nbFiles; + return total; +} + +static int +FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel) +{ + fileInfo_t info; + memset(&info, 0, sizeof(info)); + { InfoError const error = getFileInfo(&info, inFileName); + switch (error) { + case info_frame_error: + /* display error, but provide output */ + DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName); + break; + case info_not_zstd: + DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName); + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_file_error: + /* error occurred while opening the file */ + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_truncated_input: + DISPLAYOUT("File \"%s\" is truncated \n", inFileName); + if (displayLevel > 2) DISPLAYOUT("\n"); + return 1; + case info_success: + default: + break; + } + + displayInfo(inFileName, &info, displayLevel); + *total = FIO_addFInfo(*total, info); + assert(error == info_success || error == info_frame_error); + return (int)error; + } +} + +int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel) +{ + /* ensure no specified input is stdin (needs fseek() capability) */ + { unsigned u; + for (u=0; u<numFiles;u++) { + ERROR_IF(!strcmp (filenameTable[u], stdinmark), + 1, "zstd: --list does not support reading from standard input"); + } } + + if (numFiles == 0) { + if (!IS_CONSOLE(stdin)) { + DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n"); + } + DISPLAYLEVEL(1, "No files given \n"); + return 1; + } + + if (displayLevel <= 2) { + DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n"); + } + { int error = 0; + fileInfo_t total; + memset(&total, 0, sizeof(total)); + total.usesCheck = 1; + /* --list each file, and check for any error */ + { unsigned u; + for (u=0; u<numFiles;u++) { + error |= FIO_listFile(&total, filenameTable[u], displayLevel); + } } + if (numFiles > 1 && displayLevel <= 2) { /* display total */ + UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize); + UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize); + double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize; + const char* const checkString = (total.usesCheck ? "XXH64" : ""); + DISPLAYOUT("----------------------------------------------------------------- \n"); + if (total.decompUnavailable) { + DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + checkString, (unsigned)total.nbFiles); + } else { + DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n", + total.numSkippableFrames + total.numActualFrames, + total.numSkippableFrames, + compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix, + decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix, + ratio, checkString, (unsigned)total.nbFiles); + } } + return error; + } +} + + +#endif /* #ifndef ZSTD_NODECOMPRESS */ diff --git a/contrib/libs/zstd/programs/fileio.h b/contrib/libs/zstd/programs/fileio.h new file mode 100644 index 0000000000..61094db83c --- /dev/null +++ b/contrib/libs/zstd/programs/fileio.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef FILEIO_H_23981798732 +#define FILEIO_H_23981798732 + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "../lib/zstd.h" /* ZSTD_* */ + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* ************************************* +* Special i/o constants +**************************************/ +#define stdinmark "/*stdin*\\" +#define stdoutmark "/*stdout*\\" +#ifdef _WIN32 +# define nulmark "NUL" +#else +# define nulmark "/dev/null" +#endif + +/** + * We test whether the extension we found starts with 't', and if so, we append + * ".tar" to the end of the output name. + */ +#define LZMA_EXTENSION ".lzma" +#define XZ_EXTENSION ".xz" +#define TXZ_EXTENSION ".txz" + +#define GZ_EXTENSION ".gz" +#define TGZ_EXTENSION ".tgz" + +#define ZSTD_EXTENSION ".zst" +#define TZSTD_EXTENSION ".tzst" +#define ZSTD_ALT_EXTENSION ".zstd" /* allow decompression of .zstd files */ + +#define LZ4_EXTENSION ".lz4" +#define TLZ4_EXTENSION ".tlz4" + + +/*-************************************* +* Types +***************************************/ +typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; + +typedef struct FIO_prefs_s FIO_prefs_t; + +FIO_prefs_t* FIO_createPreferences(void); +void FIO_freePreferences(FIO_prefs_t* const prefs); + +/* Mutable struct containing relevant context and state regarding (de)compression with respect to file I/O */ +typedef struct FIO_ctx_s FIO_ctx_t; + +FIO_ctx_t* FIO_createContext(void); +void FIO_freeContext(FIO_ctx_t* const fCtx); + +typedef struct FIO_display_prefs_s FIO_display_prefs_t; + +typedef enum { FIO_ps_auto, FIO_ps_never, FIO_ps_always } FIO_progressSetting_e; + +/*-************************************* +* Parameters +***************************************/ +/* FIO_prefs_t functions */ +void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType); +void FIO_overwriteMode(FIO_prefs_t* const prefs); +void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt); +void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel); +void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel); +void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder); +void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize); +void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag); +void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag); +void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog); +void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag); +void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog); +void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog); +void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch); +void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit); +void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers); +void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog); +void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag); +void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse); /**< 0: no sparse; 1: disable on stdout; 2: always enabled */ +void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable); +void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize); +void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize); +void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint); +void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode); +void FIO_setLiteralCompressionMode( + FIO_prefs_t* const prefs, + ZSTD_paramSwitch_e mode); + +void FIO_setProgressSetting(FIO_progressSetting_e progressSetting); +void FIO_setNotificationLevel(int level); +void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles); +void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices); +void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value); +void FIO_setContentSize(FIO_prefs_t* const prefs, int value); +void FIO_displayCompressionParameters(const FIO_prefs_t* prefs); + +/* FIO_ctx_t functions */ +void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value); +void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value); +void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames); + +/*-************************************* +* Single File functions +***************************************/ +/** FIO_compressFilename() : + * @return : 0 == ok; 1 == pb with src file. */ +int FIO_compressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, + const char* outfilename, const char* infilename, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters comprParams); + +/** FIO_decompressFilename() : + * @return : 0 == ok; 1 == pb with src file. */ +int FIO_decompressFilename (FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, + const char* outfilename, const char* infilename, const char* dictFileName); + +int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel); + + +/*-************************************* +* Multiple File functions +***************************************/ +/** FIO_compressMultipleFilenames() : + * @return : nb of missing files */ +int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** inFileNamesTable, + const char* outMirroredDirName, + const char* outDirName, + const char* outFileName, const char* suffix, + const char* dictFileName, int compressionLevel, + ZSTD_compressionParameters comprParams); + +/** FIO_decompressMultipleFilenames() : + * @return : nb of missing or skipped files */ +int FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx, + FIO_prefs_t* const prefs, + const char** srcNamesTable, + const char* outMirroredDirName, + const char* outDirName, + const char* outFileName, + const char* dictFileName); + +/* FIO_checkFilenameCollisions() : + * Checks for and warns if there are any files that would have the same output path + */ +int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles); + + + +/*-************************************* +* Advanced stuff (should actually be hosted elsewhere) +***************************************/ + +/* custom crash signal handler */ +void FIO_addAbortHandler(void); + + + +#if defined (__cplusplus) +} +#endif + +#endif /* FILEIO_H_23981798732 */ diff --git a/contrib/libs/zstd/programs/platform.h b/contrib/libs/zstd/programs/platform.h new file mode 100644 index 0000000000..b858e3b484 --- /dev/null +++ b/contrib/libs/zstd/programs/platform.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef PLATFORM_H_MODULE +#define PLATFORM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/* ************************************** +* Compiler Options +****************************************/ +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */ +# define _CRT_NONSTDC_NO_WARNINGS /* Disable C4996 complaining about posix function names */ +# if (_MSC_VER <= 1800) /* 1800 == Visual Studio 2013 */ +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before <io.h> and <windows.h> */ +# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ +# endif +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************** +* Detect 64-bit OS +* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +****************************************/ +#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ + || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ + || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \ + || defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \ + || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \ + || (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \ + || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \ + || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */ +# if !defined(__64BIT__) +# define __64BIT__ 1 +# endif +#endif + + +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + + +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = 0 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 1 for Unix-like but non-POSIX +* PLATFORM_POSIX_VERSION > 1 is equal to found _POSIX_VERSION +* Value of PLATFORM_POSIX_VERSION can be forced on command line +***************************************************************/ +#ifndef PLATFORM_POSIX_VERSION + +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */ + /* exception rule : force posix version to 200112L, + * note: it's better to use unistd.h's _POSIX_VERSION whenever possible */ +# define PLATFORM_POSIX_VERSION 200112L + +/* try to determine posix version through official unistd.h's _POSIX_VERSION (http://pubs.opengroup.org/onlinepubs/7908799/xsh/unistd.h.html). + * note : there is no simple way to know in advance if <unistd.h> is present or not on target system, + * Posix specification mandates its presence and its content, but target system must respect this spec. + * It's necessary to _not_ #include <unistd.h> whenever target OS is not unix-like + * otherwise it will block preprocessing stage. + * The following list of build macros tries to "guess" if target OS is likely unix-like, and therefore can #include <unistd.h> + */ +# elif !defined(_WIN32) \ + && ( defined(__unix__) || defined(__unix) \ + || defined(__midipix__) || defined(__VMS) || defined(__HAIKU__) ) + +# if defined(__linux__) || defined(__linux) || defined(__CYGWIN__) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L /* feature test macro : https://www.gnu.org/software/libc/manual/html_node/Feature-Test-Macros.html */ +# endif +# endif +# include <unistd.h> /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 1 +# endif + +# ifdef __UCLIBC__ +# ifndef __USE_MISC +# define __USE_MISC /* enable st_mtim on uclibc */ +# endif +# endif + +# else /* non-unix target platform (like Windows) */ +# define PLATFORM_POSIX_VERSION 0 +# endif + +#endif /* PLATFORM_POSIX_VERSION */ + + +#if PLATFORM_POSIX_VERSION > 1 + /* glibc < 2.26 may not expose struct timespec def without this. + * See issue #1920. */ +# ifndef _ATFILE_SOURCE +# define _ATFILE_SOURCE +# endif +#endif + + +/*-********************************************* +* Detect if isatty() and fileno() are available +************************************************/ +#if (defined(__linux__) && (PLATFORM_POSIX_VERSION > 1)) \ + || (PLATFORM_POSIX_VERSION >= 200112L) \ + || defined(__DJGPP__) +# include <unistd.h> /* isatty */ +# include <stdio.h> /* fileno */ +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#elif defined(MSDOS) || defined(OS2) +# include <io.h> /* _isatty */ +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#elif defined(WIN32) || defined(_WIN32) +# include <io.h> /* _isatty */ +# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include <stdio.h> /* FILE */ +static __inline int IS_CONSOLE(FILE* stdStream) { + DWORD dummy; + return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); +} +#else +# define IS_CONSOLE(stdStream) 0 +#endif + + +/****************************** +* OS-specific IO behaviors +******************************/ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) +# include <fcntl.h> /* _O_BINARY */ +# include <io.h> /* _setmode, _fileno, _get_osfhandle */ +# if !defined(__DJGPP__) +# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include <winioctl.h> /* FSCTL_SET_SPARSE */ +# define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# else +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +# define SET_SPARSE_FILE_MODE(file) +# endif +#else +# define SET_BINARY_MODE(file) +# define SET_SPARSE_FILE_MODE(file) +#endif + + +#ifndef ZSTD_SPARSE_DEFAULT +# if (defined(__APPLE__) && defined(__MACH__)) +# define ZSTD_SPARSE_DEFAULT 0 +# else +# define ZSTD_SPARSE_DEFAULT 1 +# endif +#endif + + +#ifndef ZSTD_START_SYMBOLLIST_FRAME +# ifdef __linux__ +# define ZSTD_START_SYMBOLLIST_FRAME 2 +# elif defined __APPLE__ +# define ZSTD_START_SYMBOLLIST_FRAME 4 +# else +# define ZSTD_START_SYMBOLLIST_FRAME 0 +# endif +#endif + + +#ifndef ZSTD_SETPRIORITY_SUPPORT + /* mandates presence of <sys/resource.h> and support for setpriority() : http://man7.org/linux/man-pages/man2/setpriority.2.html */ +# define ZSTD_SETPRIORITY_SUPPORT (PLATFORM_POSIX_VERSION >= 200112L) +#endif + + +#ifndef ZSTD_NANOSLEEP_SUPPORT + /* mandates support of nanosleep() within <time.h> : http://man7.org/linux/man-pages/man2/nanosleep.2.html */ +# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) \ + || (PLATFORM_POSIX_VERSION >= 200112L) +# define ZSTD_NANOSLEEP_SUPPORT 1 +# else +# define ZSTD_NANOSLEEP_SUPPORT 0 +# endif +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* PLATFORM_H_MODULE */ diff --git a/contrib/libs/zstd/programs/timefn.c b/contrib/libs/zstd/programs/timefn.c new file mode 100644 index 0000000000..64577b0e93 --- /dev/null +++ b/contrib/libs/zstd/programs/timefn.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* === Dependencies === */ + +#include "timefn.h" + + +/*-**************************************** +* Time functions +******************************************/ + +#if defined(_WIN32) /* Windows */ + +#include <stdlib.h> /* abort */ +#include <stdio.h> /* perror */ + +UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) { + perror("timefn::QueryPerformanceFrequency"); + abort(); + } + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; +} + + + +#elif defined(__APPLE__) && defined(__MACH__) + +UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom))/1000ULL; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) +{ + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (PTime)rate.numer) / ((PTime)rate.denom); +} + + +/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance. + Android also lacks it but does define TIME_UTC. */ +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) && !defined(__ANDROID__) + +#include <stdlib.h> /* abort */ +#include <stdio.h> /* perror */ + +UTIL_time_t UTIL_getTime(void) +{ + /* time must be initialized, othersize it may fail msan test. + * No good reason, likely a limitation of timespec_get() for some target */ + UTIL_time_t time = UTIL_TIME_INITIALIZER; + if (timespec_get(&time, TIME_UTC) != TIME_UTC) { + perror("timefn::timespec_get"); + abort(); + } + return time; +} + +static UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; +} + +PTime UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; +} + +PTime UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) +{ + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + PTime nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; +} + + + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + +UTIL_time_t UTIL_getTime(void) { return clock(); } +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + +#endif + + + +/* returns time span in microseconds */ +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + +/* returns time span in microseconds */ +PTime UTIL_clockSpanNano(UTIL_time_t clockStart ) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeNano(clockStart, clockEnd); +} + +void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} diff --git a/contrib/libs/zstd/programs/timefn.h b/contrib/libs/zstd/programs/timefn.h new file mode 100644 index 0000000000..3fcd78a28e --- /dev/null +++ b/contrib/libs/zstd/programs/timefn.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef TIME_FN_H_MODULE_287987 +#define TIME_FN_H_MODULE_287987 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-**************************************** +* Dependencies +******************************************/ +#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */ + + + +/*-**************************************** +* Local Types +******************************************/ + +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include <inttypes.h> +# else +# include <stdint.h> /* intptr_t */ +# endif + typedef uint64_t PTime; /* Precise Time */ +#else + typedef unsigned long long PTime; /* does not support compilers without long long support */ +#endif + + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + + #include <windows.h> /* LARGE_INTEGER */ + typedef LARGE_INTEGER UTIL_time_t; + #define UTIL_TIME_INITIALIZER { { 0, 0 } } + +#elif defined(__APPLE__) && defined(__MACH__) + + #include <mach/mach_time.h> + typedef PTime UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +/* C11 requires timespec_get, but FreeBSD 11 lacks it, while still claiming C11 compliance. + Android also lacks it but does define TIME_UTC. */ +#elif (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */) \ + && defined(TIME_UTC) && !defined(__ANDROID__) + + typedef struct timespec UTIL_time_t; + #define UTIL_TIME_INITIALIZER { 0, 0 } + +#else /* relies on standard C90 (note : clock_t measurements can be wrong when using multi-threading) */ + + typedef clock_t UTIL_time_t; + #define UTIL_TIME_INITIALIZER 0 + +#endif + + +UTIL_time_t UTIL_getTime(void); +PTime UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd); +PTime UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd); + +#define SEC_TO_MICRO ((PTime)1000000) +PTime UTIL_clockSpanMicro(UTIL_time_t clockStart); +PTime UTIL_clockSpanNano(UTIL_time_t clockStart); + +void UTIL_waitForNextTick(void); + + +#if defined (__cplusplus) +} +#endif + +#endif /* TIME_FN_H_MODULE_287987 */ diff --git a/contrib/libs/zstd/programs/util.c b/contrib/libs/zstd/programs/util.c new file mode 100644 index 0000000000..d69b72a37c --- /dev/null +++ b/contrib/libs/zstd/programs/util.c @@ -0,0 +1,1399 @@ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-**************************************** +* Dependencies +******************************************/ +#include "util.h" /* note : ensure that platform.h is included first ! */ +#include <stdlib.h> /* malloc, realloc, free */ +#include <stdio.h> /* fprintf */ +#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC, nanosleep */ +#include <errno.h> +#include <assert.h> + +#if defined(_WIN32) +# include <sys/utime.h> /* utime */ +# include <io.h> /* _chmod */ +#else +# include <unistd.h> /* chown, stat */ +# if PLATFORM_POSIX_VERSION < 200809L || !defined(st_mtime) +# include <utime.h> /* utime */ +# else +# include <fcntl.h> /* AT_FDCWD */ +# include <sys/stat.h> /* utimensat */ +# endif +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) +#include <direct.h> /* needed for _mkdir in windows */ +#endif + +#if defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# include <dirent.h> /* opendir, readdir */ +# include <string.h> /* strerror, memcpy */ +#endif /* #ifdef _WIN32 */ + +/*-**************************************** +* Internal Macros +******************************************/ + +/* CONTROL is almost like an assert(), but is never disabled. + * It's designed for failures that may happen rarely, + * but we don't want to maintain a specific error code path for them, + * such as a malloc() returning NULL for example. + * Since it's always active, this macro can trigger side effects. + */ +#define CONTROL(c) { \ + if (!(c)) { \ + UTIL_DISPLAYLEVEL(1, "Error : %s, %i : %s", \ + __FILE__, __LINE__, #c); \ + exit(1); \ +} } + +/* console log */ +#define UTIL_DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define UTIL_DISPLAYLEVEL(l, ...) { if (g_utilDisplayLevel>=l) { UTIL_DISPLAY(__VA_ARGS__); } } + +/* A modified version of realloc(). + * If UTIL_realloc() fails the original block is freed. + */ +UTIL_STATIC void* UTIL_realloc(void *ptr, size_t size) +{ + void *newptr = realloc(ptr, size); + if (newptr) return newptr; + free(ptr); + return NULL; +} + +#if defined(_MSC_VER) + #define chmod _chmod +#endif + + +/*-**************************************** +* Console log +******************************************/ +int g_utilDisplayLevel; + +int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, + const char* acceptableLetters, int hasStdinInput) { + int ch, result; + + if (hasStdinInput) { + UTIL_DISPLAY("stdin is an input - not proceeding.\n"); + return 1; + } + + UTIL_DISPLAY("%s", prompt); + ch = getchar(); + result = 0; + if (strchr(acceptableLetters, ch) == NULL) { + UTIL_DISPLAY("%s", abortMsg); + result = 1; + } + /* flush the rest */ + while ((ch!=EOF) && (ch!='\n')) + ch = getchar(); + return result; +} + + +/*-************************************* +* Constants +***************************************/ +#define LIST_SIZE_INCREASE (8*1024) +#define MAX_FILE_OF_FILE_NAMES_SIZE (1<<20)*50 + + +/*-************************************* +* Functions +***************************************/ + +int UTIL_stat(const char* filename, stat_t* statbuf) +{ +#if defined(_MSC_VER) + return !_stat64(filename, statbuf); +#elif defined(__MINGW32__) && defined (__MSVCRT__) + return !_stati64(filename, statbuf); +#else + return !stat(filename, statbuf); +#endif +} + +int UTIL_isRegularFile(const char* infilename) +{ + stat_t statbuf; + return UTIL_stat(infilename, &statbuf) && UTIL_isRegularFileStat(&statbuf); +} + +int UTIL_isRegularFileStat(const stat_t* statbuf) +{ +#if defined(_MSC_VER) + return (statbuf->st_mode & S_IFREG) != 0; +#else + return S_ISREG(statbuf->st_mode) != 0; +#endif +} + +/* like chmod, but avoid changing permission of /dev/null */ +int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions) +{ + stat_t localStatBuf; + if (statbuf == NULL) { + if (!UTIL_stat(filename, &localStatBuf)) return 0; + statbuf = &localStatBuf; + } + if (!UTIL_isRegularFileStat(statbuf)) return 0; /* pretend success, but don't change anything */ + return chmod(filename, permissions); +} + +/* set access and modification times */ +int UTIL_utime(const char* filename, const stat_t *statbuf) +{ + int ret; + /* We check that st_mtime is a macro here in order to give us confidence + * that struct stat has a struct timespec st_mtim member. We need this + * check because there are some platforms that claim to be POSIX 2008 + * compliant but which do not have st_mtim... */ +#if (PLATFORM_POSIX_VERSION >= 200809L) && defined(st_mtime) + /* (atime, mtime) */ + struct timespec timebuf[2] = { {0, UTIME_NOW} }; + timebuf[1] = statbuf->st_mtim; + ret = utimensat(AT_FDCWD, filename, timebuf, 0); +#else + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + ret = utime(filename, &timebuf); +#endif + errno = 0; + return ret; +} + +int UTIL_setFileStat(const char *filename, const stat_t *statbuf) +{ + int res = 0; + + stat_t curStatBuf; + if (!UTIL_stat(filename, &curStatBuf) || !UTIL_isRegularFileStat(&curStatBuf)) + return -1; + + /* set access and modification times */ + res += UTIL_utime(filename, statbuf); + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ +#endif + + res += UTIL_chmod(filename, &curStatBuf, statbuf->st_mode & 07777); /* Copy file permissions */ + + errno = 0; + return -res; /* number of errors is returned */ +} + +int UTIL_isDirectory(const char* infilename) +{ + stat_t statbuf; + return UTIL_stat(infilename, &statbuf) && UTIL_isDirectoryStat(&statbuf); +} + +int UTIL_isDirectoryStat(const stat_t* statbuf) +{ +#if defined(_MSC_VER) + return (statbuf->st_mode & _S_IFDIR) != 0; +#else + return S_ISDIR(statbuf->st_mode) != 0; +#endif +} + +int UTIL_compareStr(const void *p1, const void *p2) { + return strcmp(* (char * const *) p1, * (char * const *) p2); +} + +int UTIL_isSameFile(const char* fName1, const char* fName2) +{ + assert(fName1 != NULL); assert(fName2 != NULL); +#if defined(_MSC_VER) || defined(_WIN32) + /* note : Visual does not support file identification by inode. + * inode does not work on Windows, even with a posix layer, like msys2. + * The following work-around is limited to detecting exact name repetition only, + * aka `filename` is considered different from `subdir/../filename` */ + return !strcmp(fName1, fName2); +#else + { stat_t file1Stat; + stat_t file2Stat; + return UTIL_stat(fName1, &file1Stat) + && UTIL_stat(fName2, &file2Stat) + && (file1Stat.st_dev == file2Stat.st_dev) + && (file1Stat.st_ino == file2Stat.st_ino); + } +#endif +} + +/* UTIL_isFIFO : distinguish named pipes */ +int UTIL_isFIFO(const char* infilename) +{ +/* macro guards, as defined in : https://linux.die.net/man/2/lstat */ +#if PLATFORM_POSIX_VERSION >= 200112L + stat_t statbuf; + if (UTIL_stat(infilename, &statbuf) && UTIL_isFIFOStat(&statbuf)) return 1; +#endif + (void)infilename; + return 0; +} + +/* UTIL_isFIFO : distinguish named pipes */ +int UTIL_isFIFOStat(const stat_t* statbuf) +{ +/* macro guards, as defined in : https://linux.die.net/man/2/lstat */ +#if PLATFORM_POSIX_VERSION >= 200112L + if (S_ISFIFO(statbuf->st_mode)) return 1; +#endif + (void)statbuf; + return 0; +} + +/* UTIL_isBlockDevStat : distinguish named pipes */ +int UTIL_isBlockDevStat(const stat_t* statbuf) +{ +/* macro guards, as defined in : https://linux.die.net/man/2/lstat */ +#if PLATFORM_POSIX_VERSION >= 200112L + if (S_ISBLK(statbuf->st_mode)) return 1; +#endif + (void)statbuf; + return 0; +} + +int UTIL_isLink(const char* infilename) +{ +/* macro guards, as defined in : https://linux.die.net/man/2/lstat */ +#if PLATFORM_POSIX_VERSION >= 200112L + stat_t statbuf; + int const r = lstat(infilename, &statbuf); + if (!r && S_ISLNK(statbuf.st_mode)) return 1; +#endif + (void)infilename; + return 0; +} + +U64 UTIL_getFileSize(const char* infilename) +{ + stat_t statbuf; + if (!UTIL_stat(infilename, &statbuf)) return UTIL_FILESIZE_UNKNOWN; + return UTIL_getFileSizeStat(&statbuf); +} + +U64 UTIL_getFileSizeStat(const stat_t* statbuf) +{ + if (!UTIL_isRegularFileStat(statbuf)) return UTIL_FILESIZE_UNKNOWN; +#if defined(_MSC_VER) + if (!(statbuf->st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; +#elif defined(__MINGW32__) && defined (__MSVCRT__) + if (!(statbuf->st_mode & S_IFREG)) return UTIL_FILESIZE_UNKNOWN; +#else + if (!S_ISREG(statbuf->st_mode)) return UTIL_FILESIZE_UNKNOWN; +#endif + return (U64)statbuf->st_size; +} + +UTIL_HumanReadableSize_t UTIL_makeHumanReadableSize(U64 size) +{ + UTIL_HumanReadableSize_t hrs; + + if (g_utilDisplayLevel > 3) { + /* In verbose mode, do not scale sizes down, except in the case of + * values that exceed the integral precision of a double. */ + if (size >= (1ull << 53)) { + hrs.value = (double)size / (1ull << 20); + hrs.suffix = " MiB"; + /* At worst, a double representation of a maximal size will be + * accurate to better than tens of kilobytes. */ + hrs.precision = 2; + } else { + hrs.value = (double)size; + hrs.suffix = " B"; + hrs.precision = 0; + } + } else { + /* In regular mode, scale sizes down and use suffixes. */ + if (size >= (1ull << 60)) { + hrs.value = (double)size / (1ull << 60); + hrs.suffix = " EiB"; + } else if (size >= (1ull << 50)) { + hrs.value = (double)size / (1ull << 50); + hrs.suffix = " PiB"; + } else if (size >= (1ull << 40)) { + hrs.value = (double)size / (1ull << 40); + hrs.suffix = " TiB"; + } else if (size >= (1ull << 30)) { + hrs.value = (double)size / (1ull << 30); + hrs.suffix = " GiB"; + } else if (size >= (1ull << 20)) { + hrs.value = (double)size / (1ull << 20); + hrs.suffix = " MiB"; + } else if (size >= (1ull << 10)) { + hrs.value = (double)size / (1ull << 10); + hrs.suffix = " KiB"; + } else { + hrs.value = (double)size; + hrs.suffix = " B"; + } + + if (hrs.value >= 100 || (U64)hrs.value == size) { + hrs.precision = 0; + } else if (hrs.value >= 10) { + hrs.precision = 1; + } else if (hrs.value > 1) { + hrs.precision = 2; + } else { + hrs.precision = 3; + } + } + + return hrs; +} + +U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + unsigned n; + for (n=0; n<nbFiles; n++) { + U64 const size = UTIL_getFileSize(fileNamesTable[n]); + if (size == UTIL_FILESIZE_UNKNOWN) return UTIL_FILESIZE_UNKNOWN; + total += size; + } + return total; +} + + +/* condition : @file must be valid, and not have reached its end. + * @return : length of line written into @buf, ended with `\0` instead of '\n', + * or 0, if there is no new line */ +static size_t readLineFromFile(char* buf, size_t len, FILE* file) +{ + assert(!feof(file)); + if ( fgets(buf, (int) len, file) == NULL ) return 0; + { size_t linelen = strlen(buf); + if (strlen(buf)==0) return 0; + if (buf[linelen-1] == '\n') linelen--; + buf[linelen] = '\0'; + return linelen+1; + } +} + +/* Conditions : + * size of @inputFileName file must be < @dstCapacity + * @dst must be initialized + * @return : nb of lines + * or -1 if there's an error + */ +static int +readLinesFromFile(void* dst, size_t dstCapacity, + const char* inputFileName) +{ + int nbFiles = 0; + size_t pos = 0; + char* const buf = (char*)dst; + FILE* const inputFile = fopen(inputFileName, "r"); + + assert(dst != NULL); + + if(!inputFile) { + if (g_utilDisplayLevel >= 1) perror("zstd:util:readLinesFromFile"); + return -1; + } + + while ( !feof(inputFile) ) { + size_t const lineLength = readLineFromFile(buf+pos, dstCapacity-pos, inputFile); + if (lineLength == 0) break; + assert(pos + lineLength < dstCapacity); + pos += lineLength; + ++nbFiles; + } + + CONTROL( fclose(inputFile) == 0 ); + + return nbFiles; +} + +/*Note: buf is not freed in case function successfully created table because filesTable->fileNames[0] = buf*/ +FileNamesTable* +UTIL_createFileNamesTable_fromFileName(const char* inputFileName) +{ + size_t nbFiles = 0; + char* buf; + size_t bufSize; + size_t pos = 0; + stat_t statbuf; + + if (!UTIL_stat(inputFileName, &statbuf) || !UTIL_isRegularFileStat(&statbuf)) + return NULL; + + { U64 const inputFileSize = UTIL_getFileSizeStat(&statbuf); + if(inputFileSize > MAX_FILE_OF_FILE_NAMES_SIZE) + return NULL; + bufSize = (size_t)(inputFileSize + 1); /* (+1) to add '\0' at the end of last filename */ + } + + buf = (char*) malloc(bufSize); + CONTROL( buf != NULL ); + + { int const ret_nbFiles = readLinesFromFile(buf, bufSize, inputFileName); + + if (ret_nbFiles <= 0) { + free(buf); + return NULL; + } + nbFiles = (size_t)ret_nbFiles; + } + + { const char** filenamesTable = (const char**) malloc(nbFiles * sizeof(*filenamesTable)); + CONTROL(filenamesTable != NULL); + + { size_t fnb; + for (fnb = 0, pos = 0; fnb < nbFiles; fnb++) { + filenamesTable[fnb] = buf+pos; + pos += strlen(buf+pos)+1; /* +1 for the finishing `\0` */ + } } + assert(pos <= bufSize); + + return UTIL_assembleFileNamesTable(filenamesTable, nbFiles, buf); + } +} + +static FileNamesTable* +UTIL_assembleFileNamesTable2(const char** filenames, size_t tableSize, size_t tableCapacity, char* buf) +{ + FileNamesTable* const table = (FileNamesTable*) malloc(sizeof(*table)); + CONTROL(table != NULL); + table->fileNames = filenames; + table->buf = buf; + table->tableSize = tableSize; + table->tableCapacity = tableCapacity; + return table; +} + +FileNamesTable* +UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf) +{ + return UTIL_assembleFileNamesTable2(filenames, tableSize, tableSize, buf); +} + +void UTIL_freeFileNamesTable(FileNamesTable* table) +{ + if (table==NULL) return; + free((void*)table->fileNames); + free(table->buf); + free(table); +} + +FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize) +{ + const char** const fnTable = (const char**)malloc(tableSize * sizeof(*fnTable)); + FileNamesTable* fnt; + if (fnTable==NULL) return NULL; + fnt = UTIL_assembleFileNamesTable(fnTable, tableSize, NULL); + fnt->tableSize = 0; /* the table is empty */ + return fnt; +} + +void UTIL_refFilename(FileNamesTable* fnt, const char* filename) +{ + assert(fnt->tableSize < fnt->tableCapacity); + fnt->fileNames[fnt->tableSize] = filename; + fnt->tableSize++; +} + +static size_t getTotalTableSize(FileNamesTable* table) +{ + size_t fnb = 0, totalSize = 0; + for(fnb = 0 ; fnb < table->tableSize && table->fileNames[fnb] ; ++fnb) { + totalSize += strlen(table->fileNames[fnb]) + 1; /* +1 to add '\0' at the end of each fileName */ + } + return totalSize; +} + +FileNamesTable* +UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2) +{ + unsigned newTableIdx = 0; + size_t pos = 0; + size_t newTotalTableSize; + char* buf; + + FileNamesTable* const newTable = UTIL_assembleFileNamesTable(NULL, 0, NULL); + CONTROL( newTable != NULL ); + + newTotalTableSize = getTotalTableSize(table1) + getTotalTableSize(table2); + + buf = (char*) calloc(newTotalTableSize, sizeof(*buf)); + CONTROL ( buf != NULL ); + + newTable->buf = buf; + newTable->tableSize = table1->tableSize + table2->tableSize; + newTable->fileNames = (const char **) calloc(newTable->tableSize, sizeof(*(newTable->fileNames))); + CONTROL ( newTable->fileNames != NULL ); + + { unsigned idx1; + for( idx1=0 ; (idx1 < table1->tableSize) && table1->fileNames[idx1] && (pos < newTotalTableSize); ++idx1, ++newTableIdx) { + size_t const curLen = strlen(table1->fileNames[idx1]); + memcpy(buf+pos, table1->fileNames[idx1], curLen); + assert(newTableIdx <= newTable->tableSize); + newTable->fileNames[newTableIdx] = buf+pos; + pos += curLen+1; + } } + + { unsigned idx2; + for( idx2=0 ; (idx2 < table2->tableSize) && table2->fileNames[idx2] && (pos < newTotalTableSize) ; ++idx2, ++newTableIdx) { + size_t const curLen = strlen(table2->fileNames[idx2]); + memcpy(buf+pos, table2->fileNames[idx2], curLen); + assert(newTableIdx <= newTable->tableSize); + newTable->fileNames[newTableIdx] = buf+pos; + pos += curLen+1; + } } + assert(pos <= newTotalTableSize); + newTable->tableSize = newTableIdx; + + UTIL_freeFileNamesTable(table1); + UTIL_freeFileNamesTable(table2); + + return newTable; +} + +#ifdef _WIN32 +static int UTIL_prepareFileList(const char* dirName, + char** bufStart, size_t* pos, + char** bufEnd, int followLinks) +{ + char* path; + size_t dirLength, pathLength; + int nbFiles = 0; + WIN32_FIND_DATAA cFile; + HANDLE hFile; + + dirLength = strlen(dirName); + path = (char*) malloc(dirLength + 3); + if (!path) return 0; + + memcpy(path, dirName, dirLength); + path[dirLength] = '\\'; + path[dirLength+1] = '*'; + path[dirLength+2] = 0; + + hFile=FindFirstFileA(path, &cFile); + if (hFile == INVALID_HANDLE_VALUE) { + UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s'\n", dirName); + return 0; + } + free(path); + + do { + size_t const fnameLength = strlen(cFile.cFileName); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { FindClose(hFile); return 0; } + memcpy(path, dirName, dirLength); + path[dirLength] = '\\'; + memcpy(path+dirLength+1, cFile.cFileName, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + if (cFile.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + if ( strcmp (cFile.cFileName, "..") == 0 + || strcmp (cFile.cFileName, ".") == 0 ) + continue; + /* Recursively call "UTIL_prepareFileList" with the new path. */ + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } else if ( (cFile.dwFileAttributes & FILE_ATTRIBUTE_NORMAL) + || (cFile.dwFileAttributes & FILE_ATTRIBUTE_ARCHIVE) + || (cFile.dwFileAttributes & FILE_ATTRIBUTE_COMPRESSED) ) { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t const newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + *bufEnd = *bufStart + newListSize; + } + if (*bufStart + *pos + pathLength < *bufEnd) { + memcpy(*bufStart + *pos, path, pathLength+1 /* include final \0 */); + *pos += pathLength + 1; + nbFiles++; + } } + free(path); + } while (FindNextFileA(hFile, &cFile)); + + FindClose(hFile); + return nbFiles; +} + +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ + +static int UTIL_prepareFileList(const char *dirName, + char** bufStart, size_t* pos, + char** bufEnd, int followLinks) +{ + DIR* dir; + struct dirent * entry; + size_t dirLength; + int nbFiles = 0; + + if (!(dir = opendir(dirName))) { + UTIL_DISPLAYLEVEL(1, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); + return 0; + } + + dirLength = strlen(dirName); + errno = 0; + while ((entry = readdir(dir)) != NULL) { + char* path; + size_t fnameLength, pathLength; + if (strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".") == 0) continue; + fnameLength = strlen(entry->d_name); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { closedir(dir); return 0; } + memcpy(path, dirName, dirLength); + + path[dirLength] = '/'; + memcpy(path+dirLength+1, entry->d_name, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + + if (!followLinks && UTIL_isLink(path)) { + UTIL_DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring\n", path); + free(path); + continue; + } + + if (UTIL_isDirectory(path)) { + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd, followLinks); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } else { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + assert(newListSize >= 0); + *bufStart = (char*)UTIL_realloc(*bufStart, (size_t)newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + memcpy(*bufStart + *pos, path, pathLength + 1); /* with final \0 */ + *pos += pathLength + 1; + nbFiles++; + } } + free(path); + errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */ + } + + if (errno != 0) { + UTIL_DISPLAYLEVEL(1, "readdir(%s) error: %s \n", dirName, strerror(errno)); + free(*bufStart); + *bufStart = NULL; + } + closedir(dir); + return nbFiles; +} + +#else + +static int UTIL_prepareFileList(const char *dirName, + char** bufStart, size_t* pos, + char** bufEnd, int followLinks) +{ + (void)bufStart; (void)bufEnd; (void)pos; (void)followLinks; + UTIL_DISPLAYLEVEL(1, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE) \n", dirName); + return 0; +} + +#endif /* #ifdef _WIN32 */ + +int UTIL_isCompressedFile(const char *inputName, const char *extensionList[]) +{ + const char* ext = UTIL_getFileExtension(inputName); + while(*extensionList!=NULL) + { + const int isCompressedExtension = strcmp(ext,*extensionList); + if(isCompressedExtension==0) + return 1; + ++extensionList; + } + return 0; +} + +/*Utility function to get file extension from file */ +const char* UTIL_getFileExtension(const char* infilename) +{ + const char* extension = strrchr(infilename, '.'); + if(!extension || extension==infilename) return ""; + return extension; +} + +static int pathnameHas2Dots(const char *pathname) +{ + /* We need to figure out whether any ".." present in the path is a whole + * path token, which is the case if it is bordered on both sides by either + * the beginning/end of the path or by a directory separator. + */ + const char *needle = pathname; + while (1) { + needle = strstr(needle, ".."); + + if (needle == NULL) { + return 0; + } + + if ((needle == pathname || needle[-1] == PATH_SEP) + && (needle[2] == '\0' || needle[2] == PATH_SEP)) { + return 1; + } + + /* increment so we search for the next match */ + needle++; + }; + return 0; +} + +static int isFileNameValidForMirroredOutput(const char *filename) +{ + return !pathnameHas2Dots(filename); +} + + +#define DIR_DEFAULT_MODE 0755 +static mode_t getDirMode(const char *dirName) +{ + stat_t st; + if (!UTIL_stat(dirName, &st)) { + UTIL_DISPLAY("zstd: failed to get DIR stats %s: %s\n", dirName, strerror(errno)); + return DIR_DEFAULT_MODE; + } + if (!UTIL_isDirectoryStat(&st)) { + UTIL_DISPLAY("zstd: expected directory: %s\n", dirName); + return DIR_DEFAULT_MODE; + } + return st.st_mode; +} + +static int makeDir(const char *dir, mode_t mode) +{ +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) + int ret = _mkdir(dir); + (void) mode; +#else + int ret = mkdir(dir, mode); +#endif + if (ret != 0) { + if (errno == EEXIST) + return 0; + UTIL_DISPLAY("zstd: failed to create DIR %s: %s\n", dir, strerror(errno)); + } + return ret; +} + +/* this function requires a mutable input string */ +static void convertPathnameToDirName(char *pathname) +{ + size_t len = 0; + char* pos = NULL; + /* get dir name from pathname similar to 'dirname()' */ + assert(pathname != NULL); + + /* remove trailing '/' chars */ + len = strlen(pathname); + assert(len > 0); + while (pathname[len] == PATH_SEP) { + pathname[len] = '\0'; + len--; + } + if (len == 0) return; + + /* if input is a single file, return '.' instead. i.e. + * "xyz/abc/file.txt" => "xyz/abc" + "./file.txt" => "." + "file.txt" => "." + */ + pos = strrchr(pathname, PATH_SEP); + if (pos == NULL) { + pathname[0] = '.'; + pathname[1] = '\0'; + } else { + *pos = '\0'; + } +} + +/* pathname must be valid */ +static const char* trimLeadingRootChar(const char *pathname) +{ + assert(pathname != NULL); + if (pathname[0] == PATH_SEP) + return pathname + 1; + return pathname; +} + +/* pathname must be valid */ +static const char* trimLeadingCurrentDirConst(const char *pathname) +{ + assert(pathname != NULL); + if ((pathname[0] == '.') && (pathname[1] == PATH_SEP)) + return pathname + 2; + return pathname; +} + +static char* +trimLeadingCurrentDir(char *pathname) +{ + /* 'union charunion' can do const-cast without compiler warning */ + union charunion { + char *chr; + const char* cchr; + } ptr; + ptr.cchr = trimLeadingCurrentDirConst(pathname); + return ptr.chr; +} + +/* remove leading './' or '/' chars here */ +static const char * trimPath(const char *pathname) +{ + return trimLeadingRootChar( + trimLeadingCurrentDirConst(pathname)); +} + +static char* mallocAndJoin2Dir(const char *dir1, const char *dir2) +{ + const size_t dir1Size = strlen(dir1); + const size_t dir2Size = strlen(dir2); + char *outDirBuffer, *buffer, trailingChar; + + assert(dir1 != NULL && dir2 != NULL); + outDirBuffer = (char *) malloc(dir1Size + dir2Size + 2); + CONTROL(outDirBuffer != NULL); + + memcpy(outDirBuffer, dir1, dir1Size); + outDirBuffer[dir1Size] = '\0'; + + if (dir2[0] == '.') + return outDirBuffer; + + buffer = outDirBuffer + dir1Size; + trailingChar = *(buffer - 1); + if (trailingChar != PATH_SEP) { + *buffer = PATH_SEP; + buffer++; + } + memcpy(buffer, dir2, dir2Size); + buffer[dir2Size] = '\0'; + + return outDirBuffer; +} + +/* this function will return NULL if input srcFileName is not valid name for mirrored output path */ +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName) +{ + char* pathname = NULL; + if (!isFileNameValidForMirroredOutput(srcFileName)) + return NULL; + + pathname = mallocAndJoin2Dir(outDirRootName, trimPath(srcFileName)); + + convertPathnameToDirName(pathname); + return pathname; +} + +static int +mirrorSrcDir(char* srcDirName, const char* outDirName) +{ + mode_t srcMode; + int status = 0; + char* newDir = mallocAndJoin2Dir(outDirName, trimPath(srcDirName)); + if (!newDir) + return -ENOMEM; + + srcMode = getDirMode(srcDirName); + status = makeDir(newDir, srcMode); + free(newDir); + return status; +} + +static int +mirrorSrcDirRecursive(char* srcDirName, const char* outDirName) +{ + int status = 0; + char* pp = trimLeadingCurrentDir(srcDirName); + char* sp = NULL; + + while ((sp = strchr(pp, PATH_SEP)) != NULL) { + if (sp != pp) { + *sp = '\0'; + status = mirrorSrcDir(srcDirName, outDirName); + if (status != 0) + return status; + *sp = PATH_SEP; + } + pp = sp + 1; + } + status = mirrorSrcDir(srcDirName, outDirName); + return status; +} + +static void +makeMirroredDestDirsWithSameSrcDirMode(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; i++) + mirrorSrcDirRecursive(srcDirNames[i], outDirName); +} + +static int +firstIsParentOrSameDirOfSecond(const char* firstDir, const char* secondDir) +{ + size_t firstDirLen = strlen(firstDir), + secondDirLen = strlen(secondDir); + return firstDirLen <= secondDirLen && + (secondDir[firstDirLen] == PATH_SEP || secondDir[firstDirLen] == '\0') && + 0 == strncmp(firstDir, secondDir, firstDirLen); +} + +static int compareDir(const void* pathname1, const void* pathname2) { + /* sort it after remove the leading '/' or './'*/ + const char* s1 = trimPath(*(char * const *) pathname1); + const char* s2 = trimPath(*(char * const *) pathname2); + return strcmp(s1, s2); +} + +static void +makeUniqueMirroredDestDirs(char** srcDirNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0, uniqueDirNr = 0; + char** uniqueDirNames = NULL; + + if (nbFile == 0) + return; + + uniqueDirNames = (char** ) malloc(nbFile * sizeof (char *)); + CONTROL(uniqueDirNames != NULL); + + /* if dirs is "a/b/c" and "a/b/c/d", we only need call: + * we just need "a/b/c/d" */ + qsort((void *)srcDirNames, nbFile, sizeof(char*), compareDir); + + uniqueDirNr = 1; + uniqueDirNames[uniqueDirNr - 1] = srcDirNames[0]; + for (i = 1; i < nbFile; i++) { + char* prevDirName = srcDirNames[i - 1]; + char* currDirName = srcDirNames[i]; + + /* note: we always compare trimmed path, i.e.: + * src dir of "./foo" and "/foo" will be both saved into: + * "outDirName/foo/" */ + if (!firstIsParentOrSameDirOfSecond(trimPath(prevDirName), + trimPath(currDirName))) + uniqueDirNr++; + + /* we need maintain original src dir name instead of trimmed + * dir, so we can retrieve the original src dir's mode_t */ + uniqueDirNames[uniqueDirNr - 1] = currDirName; + } + + makeMirroredDestDirsWithSameSrcDirMode(uniqueDirNames, uniqueDirNr, outDirName); + + free(uniqueDirNames); +} + +static void +makeMirroredDestDirs(char** srcFileNames, unsigned nbFile, const char* outDirName) +{ + unsigned int i = 0; + for (i = 0; i < nbFile; ++i) + convertPathnameToDirName(srcFileNames[i]); + makeUniqueMirroredDestDirs(srcFileNames, nbFile, outDirName); +} + +void UTIL_mirrorSourceFilesDirectories(const char** inFileNames, unsigned int nbFile, const char* outDirName) +{ + unsigned int i = 0, validFilenamesNr = 0; + char** srcFileNames = (char **) malloc(nbFile * sizeof (char *)); + CONTROL(srcFileNames != NULL); + + /* check input filenames is valid */ + for (i = 0; i < nbFile; ++i) { + if (isFileNameValidForMirroredOutput(inFileNames[i])) { + char* fname = STRDUP(inFileNames[i]); + CONTROL(fname != NULL); + srcFileNames[validFilenamesNr++] = fname; + } + } + + if (validFilenamesNr > 0) { + makeDir(outDirName, DIR_DEFAULT_MODE); + makeMirroredDestDirs(srcFileNames, validFilenamesNr, outDirName); + } + + for (i = 0; i < validFilenamesNr; i++) + free(srcFileNames[i]); + free(srcFileNames); +} + +FileNamesTable* +UTIL_createExpandedFNT(const char* const* inputNames, size_t nbIfns, int followLinks) +{ + unsigned nbFiles; + char* buf = (char*)malloc(LIST_SIZE_INCREASE); + char* bufend = buf + LIST_SIZE_INCREASE; + + if (!buf) return NULL; + + { size_t ifnNb, pos; + for (ifnNb=0, pos=0, nbFiles=0; ifnNb<nbIfns; ifnNb++) { + if (!UTIL_isDirectory(inputNames[ifnNb])) { + size_t const len = strlen(inputNames[ifnNb]); + if (buf + pos + len >= bufend) { + ptrdiff_t newListSize = (bufend - buf) + LIST_SIZE_INCREASE; + assert(newListSize >= 0); + buf = (char*)UTIL_realloc(buf, (size_t)newListSize); + if (!buf) return NULL; + bufend = buf + newListSize; + } + if (buf + pos + len < bufend) { + memcpy(buf+pos, inputNames[ifnNb], len+1); /* including final \0 */ + pos += len + 1; + nbFiles++; + } + } else { + nbFiles += (unsigned)UTIL_prepareFileList(inputNames[ifnNb], &buf, &pos, &bufend, followLinks); + if (buf == NULL) return NULL; + } } } + + /* note : even if nbFiles==0, function returns a valid, though empty, FileNamesTable* object */ + + { size_t ifnNb, pos; + size_t const fntCapacity = nbFiles + 1; /* minimum 1, allows adding one reference, typically stdin */ + const char** const fileNamesTable = (const char**)malloc(fntCapacity * sizeof(*fileNamesTable)); + if (!fileNamesTable) { free(buf); return NULL; } + + for (ifnNb = 0, pos = 0; ifnNb < nbFiles; ifnNb++) { + fileNamesTable[ifnNb] = buf + pos; + if (buf + pos > bufend) { free(buf); free((void*)fileNamesTable); return NULL; } + pos += strlen(fileNamesTable[ifnNb]) + 1; + } + return UTIL_assembleFileNamesTable2(fileNamesTable, nbFiles, fntCapacity, buf); + } +} + + +void UTIL_expandFNT(FileNamesTable** fnt, int followLinks) +{ + FileNamesTable* const newFNT = UTIL_createExpandedFNT((*fnt)->fileNames, (*fnt)->tableSize, followLinks); + CONTROL(newFNT != NULL); + UTIL_freeFileNamesTable(*fnt); + *fnt = newFNT; +} + +FileNamesTable* UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames) +{ + size_t const sizeof_FNTable = nbFilenames * sizeof(*filenames); + const char** const newFNTable = (const char**)malloc(sizeof_FNTable); + if (newFNTable==NULL) return NULL; + memcpy((void*)newFNTable, filenames, sizeof_FNTable); /* void* : mitigate a Visual compiler bug or limitation */ + return UTIL_assembleFileNamesTable(newFNTable, nbFilenames, NULL); +} + + +/*-**************************************** +* count the number of cores +******************************************/ + +#if defined(_WIN32) || defined(WIN32) + +#include <windows.h> + +typedef BOOL(WINAPI* LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); + +DWORD CountSetBits(ULONG_PTR bitMask) +{ + DWORD LSHIFT = sizeof(ULONG_PTR)*8 - 1; + DWORD bitSetCount = 0; + ULONG_PTR bitTest = (ULONG_PTR)1 << LSHIFT; + DWORD i; + + for (i = 0; i <= LSHIFT; ++i) + { + bitSetCount += ((bitMask & bitTest)?1:0); + bitTest/=2; + } + + return bitSetCount; +} + +int UTIL_countCores(int logical) +{ + static int numCores = 0; + if (numCores != 0) return numCores; + + { LPFN_GLPI glpi; + BOOL done = FALSE; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL; + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = NULL; + DWORD returnLength = 0; + size_t byteOffset = 0; + +#if defined(_MSC_VER) +/* Visual Studio does not like the following cast */ +# pragma warning( disable : 4054 ) /* conversion from function ptr to data ptr */ +# pragma warning( disable : 4055 ) /* conversion from data ptr to function ptr */ +#endif + glpi = (LPFN_GLPI)(void*)GetProcAddress(GetModuleHandle(TEXT("kernel32")), + "GetLogicalProcessorInformation"); + + if (glpi == NULL) { + goto failed; + } + + while(!done) { + DWORD rc = glpi(buffer, &returnLength); + if (FALSE == rc) { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + if (buffer) + free(buffer); + buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength); + + if (buffer == NULL) { + perror("zstd"); + exit(1); + } + } else { + /* some other error */ + goto failed; + } + } else { + done = TRUE; + } } + + ptr = buffer; + + while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) { + + if (ptr->Relationship == RelationProcessorCore) { + if (logical) + numCores += CountSetBits(ptr->ProcessorMask); + else + numCores++; + } + + ptr++; + byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); + } + + free(buffer); + + return numCores; + } + +failed: + /* try to fall back on GetSystemInfo */ + { SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + numCores = sysinfo.dwNumberOfProcessors; + if (numCores == 0) numCores = 1; /* just in case */ + } + return numCores; +} + +#elif defined(__APPLE__) + +#include <sys/sysctl.h> + +/* Use apple-provided syscall + * see: man 3 sysctl */ +int UTIL_countCores(int logical) +{ + static S32 numCores = 0; /* apple specifies int32_t */ + if (numCores != 0) return numCores; + + { size_t size = sizeof(S32); + int const ret = sysctlbyname(logical ? "hw.logicalcpu" : "hw.physicalcpu", &numCores, &size, NULL, 0); + if (ret != 0) { + if (errno == ENOENT) { + /* entry not present, fall back on 1 */ + numCores = 1; + } else { + perror("zstd: can't get number of cpus"); + exit(1); + } + } + + return numCores; + } +} + +#elif defined(__linux__) + +/* parse /proc/cpuinfo + * siblings / cpu cores should give hyperthreading ratio + * otherwise fall back on sysconf */ +int UTIL_countCores(int logical) +{ + static int numCores = 0; + + if (numCores != 0) return numCores; + + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { + /* value not queryable, fall back on 1 */ + return numCores = 1; + } + + /* try to determine if there's hyperthreading */ + { FILE* const cpuinfo = fopen("/proc/cpuinfo", "r"); +#define BUF_SIZE 80 + char buff[BUF_SIZE]; + + int siblings = 0; + int cpu_cores = 0; + int ratio = 1; + + if (cpuinfo == NULL) { + /* fall back on the sysconf value */ + return numCores; + } + + /* assume the cpu cores/siblings values will be constant across all + * present processors */ + while (!feof(cpuinfo)) { + if (fgets(buff, BUF_SIZE, cpuinfo) != NULL) { + if (strncmp(buff, "siblings", 8) == 0) { + const char* const sep = strchr(buff, ':'); + if (sep == NULL || *sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + siblings = atoi(sep + 1); + } + if (strncmp(buff, "cpu cores", 9) == 0) { + const char* const sep = strchr(buff, ':'); + if (sep == NULL || *sep == '\0') { + /* formatting was broken? */ + goto failed; + } + + cpu_cores = atoi(sep + 1); + } + } else if (ferror(cpuinfo)) { + /* fall back on the sysconf value */ + goto failed; + } } + if (siblings && cpu_cores && siblings > cpu_cores) { + ratio = siblings / cpu_cores; + } + + if (ratio && numCores > ratio && !logical) { + numCores = numCores / ratio; + } + +failed: + fclose(cpuinfo); + return numCores; + } +} + +#elif defined(__FreeBSD__) + +#include <sys/param.h> +#include <sys/sysctl.h> + +/* Use physical core sysctl when available + * see: man 4 smp, man 3 sysctl */ +int UTIL_countCores(int logical) +{ + static int numCores = 0; /* freebsd sysctl is native int sized */ +#if __FreeBSD_version >= 1300008 + static int perCore = 1; +#endif + if (numCores != 0) return numCores; + +#if __FreeBSD_version >= 1300008 + { size_t size = sizeof(numCores); + int ret = sysctlbyname("kern.smp.cores", &numCores, &size, NULL, 0); + if (ret == 0) { + if (logical) { + ret = sysctlbyname("kern.smp.threads_per_core", &perCore, &size, NULL, 0); + /* default to physical cores if logical cannot be read */ + if (ret == 0) + numCores *= perCore; + } + + return numCores; + } + if (errno != ENOENT) { + perror("zstd: can't get number of cpus"); + exit(1); + } + /* sysctl not present, fall through to older sysconf method */ + } +#else + /* suppress unused parameter warning */ + (void) logical; +#endif + + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { + /* value not queryable, fall back on 1 */ + numCores = 1; + } + return numCores; +} + +#elif defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__CYGWIN__) + +/* Use POSIX sysconf + * see: man 3 sysconf */ +int UTIL_countCores(int logical) +{ + static int numCores = 0; + + /* suppress unused parameter warning */ + (void)logical; + + if (numCores != 0) return numCores; + + numCores = (int)sysconf(_SC_NPROCESSORS_ONLN); + if (numCores == -1) { + /* value not queryable, fall back on 1 */ + return numCores = 1; + } + return numCores; +} + +#else + +int UTIL_countCores(int logical) +{ + /* assume 1 */ + return 1; +} + +#endif + +int UTIL_countPhysicalCores(void) +{ + return UTIL_countCores(0); +} + +int UTIL_countLogicalCores(void) +{ + return UTIL_countCores(1); +} + +#if defined (__cplusplus) +} +#endif diff --git a/contrib/libs/zstd/programs/util.h b/contrib/libs/zstd/programs/util.h new file mode 100644 index 0000000000..add165d57c --- /dev/null +++ b/contrib/libs/zstd/programs/util.h @@ -0,0 +1,322 @@ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef UTIL_H_MODULE +#define UTIL_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*-**************************************** +* Dependencies +******************************************/ +#include "platform.h" /* PLATFORM_POSIX_VERSION, ZSTD_NANOSLEEP_SUPPORT, ZSTD_SETPRIORITY_SUPPORT */ +#include <stddef.h> /* size_t, ptrdiff_t */ +#include <sys/types.h> /* stat, utime */ +#include <sys/stat.h> /* stat, chmod */ +#include "../lib/common/mem.h" /* U64 */ + + +/*-************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define UTIL_fseek _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define UTIL_fseek fseeko +#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) +# define UTIL_fseek fseeko64 +#else +# define UTIL_fseek fseek +#endif + + +/*-************************************************* +* Sleep & priority functions: Windows - Posix - others +***************************************************/ +#if defined(_WIN32) +# include <windows.h> +# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) +# define UTIL_sleep(s) Sleep(1000*s) +# define UTIL_sleepMilli(milli) Sleep(milli) + +#elif PLATFORM_POSIX_VERSION > 0 /* Unix-like operating system */ +# include <unistd.h> /* sleep */ +# define UTIL_sleep(s) sleep(s) +# if ZSTD_NANOSLEEP_SUPPORT /* necessarily defined in platform.h */ +# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } +# else +# define UTIL_sleepMilli(milli) /* disabled */ +# endif +# if ZSTD_SETPRIORITY_SUPPORT +# include <sys/resource.h> /* setpriority */ +# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_REALTIME_PRIORITY /* disabled */ +# endif + +#else /* unknown non-unix operating system */ +# define UTIL_sleep(s) /* disabled */ +# define UTIL_sleepMilli(milli) /* disabled */ +# define SET_REALTIME_PRIORITY /* disabled */ +#endif + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(__INTEL_COMPILER) +# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */ +#endif +#if defined(__GNUC__) +# define UTIL_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define UTIL_STATIC static inline +#elif defined(_MSC_VER) +# define UTIL_STATIC static __inline +#else +# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Console log +******************************************/ +extern int g_utilDisplayLevel; + +/** + * Displays a message prompt and returns success (0) if first character from stdin + * matches any from acceptableLetters. Otherwise, returns failure (1) and displays abortMsg. + * If any of the inputs are stdin itself, then automatically return failure (1). + */ +int UTIL_requireUserConfirmation(const char* prompt, const char* abortMsg, const char* acceptableLetters, int hasStdinInput); + + +/*-**************************************** +* File functions +******************************************/ +#if defined(_MSC_VER) + typedef struct __stat64 stat_t; + typedef int mode_t; +#elif defined(__MINGW32__) && defined (__MSVCRT__) + typedef struct _stati64 stat_t; +#else + typedef struct stat stat_t; +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */ +#define PATH_SEP '\\' +#define STRDUP(s) _strdup(s) +#else +#define PATH_SEP '/' +#include <libgen.h> +#define STRDUP(s) strdup(s) +#endif + + +/** + * Calls platform's equivalent of stat() on filename and writes info to statbuf. + * Returns success (1) or failure (0). + */ +int UTIL_stat(const char* filename, stat_t* statbuf); + +/** + * Instead of getting a file's stats, this updates them with the info in the + * provided stat_t. Currently sets owner, group, atime, and mtime. Will only + * update this info for regular files. + */ +int UTIL_setFileStat(const char* filename, const stat_t* statbuf); + +/** + * Set atime to now and mtime to the st_mtim in statbuf. + * + * Directly wraps utime() or utimensat(). Returns -1 on error. + * Does not validate filename is valid. + */ +int UTIL_utime(const char* filename, const stat_t *statbuf); + +/* + * These helpers operate on a pre-populated stat_t, i.e., the result of + * calling one of the above functions. + */ + +int UTIL_isRegularFileStat(const stat_t* statbuf); +int UTIL_isDirectoryStat(const stat_t* statbuf); +int UTIL_isFIFOStat(const stat_t* statbuf); +int UTIL_isBlockDevStat(const stat_t* statbuf); +U64 UTIL_getFileSizeStat(const stat_t* statbuf); + +/** + * Like chmod(), but only modifies regular files. Provided statbuf may be NULL, + * in which case this function will stat() the file internally, in order to + * check whether it should be modified. + */ +int UTIL_chmod(char const* filename, const stat_t* statbuf, mode_t permissions); + +/* + * In the absence of a pre-existing stat result on the file in question, these + * functions will do a stat() call internally and then use that result to + * compute the needed information. + */ + +int UTIL_isRegularFile(const char* infilename); +int UTIL_isDirectory(const char* infilename); +int UTIL_isSameFile(const char* file1, const char* file2); +int UTIL_isCompressedFile(const char* infilename, const char *extensionList[]); +int UTIL_isLink(const char* infilename); +int UTIL_isFIFO(const char* infilename); + +#define UTIL_FILESIZE_UNKNOWN ((U64)(-1)) +U64 UTIL_getFileSize(const char* infilename); +U64 UTIL_getTotalFileSize(const char* const * fileNamesTable, unsigned nbFiles); + +/** + * Take @size in bytes, + * prepare the components to pretty-print it in a scaled way. + * The components in the returned struct should be passed in + * precision, value, suffix order to a "%.*f%s" format string. + * Output policy is sensible to @g_utilDisplayLevel, + * for verbose mode (@g_utilDisplayLevel >= 4), + * does not scale down. + */ +typedef struct { + double value; + int precision; + const char* suffix; +} UTIL_HumanReadableSize_t; + +UTIL_HumanReadableSize_t UTIL_makeHumanReadableSize(U64 size); + +int UTIL_compareStr(const void *p1, const void *p2); +const char* UTIL_getFileExtension(const char* infilename); +void UTIL_mirrorSourceFilesDirectories(const char** fileNamesTable, unsigned int nbFiles, const char *outDirName); +char* UTIL_createMirroredDestDirName(const char* srcFileName, const char* outDirRootName); + + + +/*-**************************************** + * Lists of Filenames + ******************************************/ + +typedef struct +{ const char** fileNames; + char* buf; /* fileNames are stored in this buffer (or are read-only) */ + size_t tableSize; /* nb of fileNames */ + size_t tableCapacity; +} FileNamesTable; + +/*! UTIL_createFileNamesTable_fromFileName() : + * read filenames from @inputFileName, and store them into returned object. + * @return : a FileNamesTable*, or NULL in case of error (ex: @inputFileName doesn't exist). + * Note: inputFileSize must be less than 50MB + */ +FileNamesTable* +UTIL_createFileNamesTable_fromFileName(const char* inputFileName); + +/*! UTIL_assembleFileNamesTable() : + * This function takes ownership of its arguments, @filenames and @buf, + * and store them inside the created object. + * note : this function never fails, + * it will rather exit() the program if internal allocation fails. + * @return : resulting FileNamesTable* object. + */ +FileNamesTable* +UTIL_assembleFileNamesTable(const char** filenames, size_t tableSize, char* buf); + +/*! UTIL_freeFileNamesTable() : + * This function is compatible with NULL argument and never fails. + */ +void UTIL_freeFileNamesTable(FileNamesTable* table); + +/*! UTIL_mergeFileNamesTable(): + * @return : FileNamesTable*, concatenation of @table1 and @table2 + * note: @table1 and @table2 are consumed (freed) by this operation + */ +FileNamesTable* +UTIL_mergeFileNamesTable(FileNamesTable* table1, FileNamesTable* table2); + + +/*! UTIL_expandFNT() : + * read names from @fnt, and expand those corresponding to directories + * update @fnt, now containing only file names, + * @return : 0 in case of success, 1 if error + * note : in case of error, @fnt[0] is NULL + */ +void UTIL_expandFNT(FileNamesTable** fnt, int followLinks); + +/*! UTIL_createFNT_fromROTable() : + * copy the @filenames pointer table inside the returned object. + * The names themselves are still stored in their original buffer, which must outlive the object. + * @return : a FileNamesTable* object, + * or NULL in case of error + */ +FileNamesTable* +UTIL_createFNT_fromROTable(const char** filenames, size_t nbFilenames); + +/*! UTIL_allocateFileNamesTable() : + * Allocates a table of const char*, to insert read-only names later on. + * The created FileNamesTable* doesn't hold a buffer. + * @return : FileNamesTable*, or NULL, if allocation fails. + */ +FileNamesTable* UTIL_allocateFileNamesTable(size_t tableSize); + + +/*! UTIL_refFilename() : + * Add a reference to read-only name into @fnt table. + * As @filename is only referenced, its lifetime must outlive @fnt. + * Internal table must be large enough to reference a new member, + * otherwise its UB (protected by an `assert()`). + */ +void UTIL_refFilename(FileNamesTable* fnt, const char* filename); + + +/* UTIL_createExpandedFNT() is only active if UTIL_HAS_CREATEFILELIST is defined. + * Otherwise, UTIL_createExpandedFNT() is a shell function which does nothing + * apart from displaying a warning message. + */ +#ifdef _WIN32 +# define UTIL_HAS_CREATEFILELIST +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# define UTIL_HAS_CREATEFILELIST +# define UTIL_HAS_MIRRORFILELIST +#else + /* do not define UTIL_HAS_CREATEFILELIST */ +#endif + +/*! UTIL_createExpandedFNT() : + * read names from @filenames, and expand those corresponding to directories. + * links are followed or not depending on @followLinks directive. + * @return : an expanded FileNamesTable*, where each name is a file + * or NULL in case of error + */ +FileNamesTable* +UTIL_createExpandedFNT(const char* const* filenames, size_t nbFilenames, int followLinks); + +#if defined(_WIN32) || defined(WIN32) +DWORD CountSetBits(ULONG_PTR bitMask); +#endif + +/*-**************************************** + * System + ******************************************/ + +int UTIL_countCores(int logical); + +int UTIL_countPhysicalCores(void); + +int UTIL_countLogicalCores(void); + +#if defined (__cplusplus) +} +#endif + +#endif /* UTIL_H_MODULE */ diff --git a/contrib/libs/zstd/programs/zstd/ya.make b/contrib/libs/zstd/programs/zstd/ya.make new file mode 100644 index 0000000000..684f19e513 --- /dev/null +++ b/contrib/libs/zstd/programs/zstd/ya.make @@ -0,0 +1,48 @@ +# Generated by devtools/yamaker. + +PROGRAM() + +WITHOUT_LICENSE_TEXTS() + +OWNER( + orivej + velavokr + g:cpp-contrib +) + +LICENSE(BSD-3-Clause) + +PEERDIR( + contrib/libs/zstd +) + +ADDINCL( + contrib/libs/zstd/lib + contrib/libs/zstd/lib/common + contrib/libs/zstd/programs +) + +NO_COMPILER_WARNINGS() + +NO_RUNTIME() + +CFLAGS( + -DZSTD_LEGACY_SUPPORT=1 + -DZSTD_MULTITHREAD +) + +SRCDIR(contrib/libs/zstd/programs) + +SRCS( + benchfn.c + benchzstd.c + datagen.c + dibio.c + fileio.c + timefn.c + util.c + zstdcli.c + zstdcli_trace.c +) + +END() diff --git a/contrib/libs/zstd/programs/zstdcli.c b/contrib/libs/zstd/programs/zstdcli.c new file mode 100644 index 0000000000..bfe18c0c1b --- /dev/null +++ b/contrib/libs/zstd/programs/zstdcli.c @@ -0,0 +1,1494 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************ +* Tuning parameters +**************************************/ +#ifndef ZSTDCLI_CLEVEL_DEFAULT +# define ZSTDCLI_CLEVEL_DEFAULT 3 +#endif + +#ifndef ZSTDCLI_CLEVEL_MAX +# define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */ +#endif + +#ifndef ZSTDCLI_NBTHREADS_DEFAULT +# define ZSTDCLI_NBTHREADS_DEFAULT 1 +#endif + +/*-************************************ +* Dependencies +**************************************/ +#include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */ +#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */ +#include <stdlib.h> /* getenv */ +#include <string.h> /* strcmp, strlen */ +#include <stdio.h> /* fprintf(), stdin, stdout, stderr */ +#include <errno.h> /* errno */ +#include <assert.h> /* assert */ + +#include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ +#ifndef ZSTD_NOBENCH +# include "benchzstd.h" /* BMK_benchFiles */ +#endif +#ifndef ZSTD_NODICT +# include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ +#endif +#ifndef ZSTD_NOTRACE +# include "zstdcli_trace.h" +#endif +#include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */ + + +/*-************************************ +* Constants +**************************************/ +#define COMPRESSOR_NAME "zstd command line interface" +#ifndef ZSTD_VERSION +# define ZSTD_VERSION "v" ZSTD_VERSION_STRING +#endif +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR + +#define ZSTD_ZSTDMT "zstdmt" +#define ZSTD_UNZSTD "unzstd" +#define ZSTD_CAT "zstdcat" +#define ZSTD_ZCAT "zcat" +#define ZSTD_GZ "gzip" +#define ZSTD_GUNZIP "gunzip" +#define ZSTD_GZCAT "gzcat" +#define ZSTD_LZMA "lzma" +#define ZSTD_UNLZMA "unlzma" +#define ZSTD_XZ "xz" +#define ZSTD_UNXZ "unxz" +#define ZSTD_LZ4 "lz4" +#define ZSTD_UNLZ4 "unlz4" + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DISPLAY_LEVEL_DEFAULT 2 + +static const char* g_defaultDictName = "dictionary"; +static const unsigned g_defaultMaxDictSize = 110 KB; +static const int g_defaultDictCLevel = 3; +static const unsigned g_defaultSelectivityLevel = 9; +static const unsigned g_defaultMaxWindowLog = 27; +#define OVERLAP_LOG_DEFAULT 9999 +#define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */ +static U32 g_overlapLog = OVERLAP_LOG_DEFAULT; +static U32 g_ldmHashLog = 0; +static U32 g_ldmMinMatch = 0; +static U32 g_ldmHashRateLog = LDM_PARAM_DEFAULT; +static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT; + + +#define DEFAULT_ACCEL 1 + +typedef enum { cover, fastCover, legacy } dictType; + +/*-************************************ +* Display Macros +**************************************/ +#define DISPLAY_F(f, ...) fprintf((f), __VA_ARGS__) +#define DISPLAYOUT(...) DISPLAY_F(stdout, __VA_ARGS__) +#define DISPLAY(...) DISPLAY_F(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } } +static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */ + + +/*-************************************ +* Check Version (when CLI linked to dynamic library) +**************************************/ + +/* Due to usage of experimental symbols and capabilities by the CLI, + * the CLI must be linked against a dynamic library of same version */ +static void checkLibVersion(void) +{ + if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString())) { + DISPLAYLEVEL(1, "Error : incorrect library version (expecting : %s ; actual : %s ) \n", + ZSTD_VERSION_STRING, ZSTD_versionString()); + DISPLAYLEVEL(1, "Please update library to version %s, or use stand-alone zstd binary \n", + ZSTD_VERSION_STRING); + exit(1); + } +} + + +/*-************************************ +* Command Line +**************************************/ +/* print help either in `stderr` or `stdout` depending on originating request + * error (badusage) => stderr + * help (usage_advanced) => stdout + */ +static void usage(FILE* f, const char* programName) +{ + DISPLAY_F(f, "Usage : \n"); + DISPLAY_F(f, " %s [args] [FILE(s)] [-o file] \n", programName); + DISPLAY_F(f, "\n"); + DISPLAY_F(f, "FILE : a filename \n"); + DISPLAY_F(f, " with no FILE, or when FILE is - , read standard input\n"); + DISPLAY_F(f, "Arguments : \n"); +#ifndef ZSTD_NOCOMPRESS + DISPLAY_F(f, " -# : # compression level (1-%d, default: %d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT); +#endif +#ifndef ZSTD_NODECOMPRESS + DISPLAY_F(f, " -d : decompression \n"); +#endif + DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n"); + DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n"); + DISPLAY_F(f, " -f : disable input and output checks. Allows overwriting existing files,\n"); + DISPLAY_F(f, " input from console, output to stdout, operating on links,\n"); + DISPLAY_F(f, " block devices, etc.\n"); + DISPLAY_F(f, "--rm : remove source file(s) after successful de/compression \n"); + DISPLAY_F(f, " -k : preserve source file(s) (default) \n"); + DISPLAY_F(f, " -h/-H : display help/long help and exit \n"); +} + +static void usage_advanced(const char* programName) +{ + DISPLAYOUT(WELCOME_MESSAGE); + usage(stdout, programName); + DISPLAYOUT( "\n"); + DISPLAYOUT( "Advanced arguments : \n"); + DISPLAYOUT( " -V : display Version number and exit \n"); + + DISPLAYOUT( " -c : write to standard output (even if it is the console) \n"); + + DISPLAYOUT( " -v : verbose mode; specify multiple times to increase verbosity \n"); + DISPLAYOUT( " -q : suppress warnings; specify twice to suppress errors too \n"); + DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n"); + DISPLAYOUT( " note: any (de)compressed output to terminal will mix with progress counter text. \n"); + +#ifdef UTIL_HAS_CREATEFILELIST + DISPLAYOUT( " -r : operate recursively on directories \n"); + DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n"); + DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n"); +#endif + +#ifdef UTIL_HAS_MIRRORFILELIST + DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n"); +#endif + + +#ifndef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)"); +#ifndef ZSTD_NODECOMPRESS + DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate)."); +#endif +#else +#ifdef ZSTD_NOCOMPRESS + DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate)."); +#endif +#endif /* ZSTD_NOCOMPRESS */ + +#ifndef ZSTD_NOTRACE + DISPLAYOUT( "\n"); + DISPLAYOUT( "--trace FILE : log tracing information to FILE."); +#endif + DISPLAYOUT( "\n"); + + DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n"); + +#ifndef ZSTD_NOCOMPRESS + DISPLAYOUT( "\n"); + DISPLAYOUT( "Advanced compression arguments : \n"); + DISPLAYOUT( "--ultra : enable levels beyond %i, up to %i (requires more memory) \n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel()); + DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog); + DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1); + DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n"); + DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n"); + DISPLAYOUT( "--patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n"); +# ifdef ZSTD_MULTITHREAD + DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n"); + DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n"); + DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n"); + DISPLAYOUT( "--auto-threads={physical,logical} (default: physical} : use either physical cores or logical cores as default when specifying -T0 \n"); + DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n"); +# endif + DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n"); + DISPLAYOUT( "--stream-size=# : specify size of streaming input from `stdin` \n"); + DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n"); + DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n"); + DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n"); + DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n"); + + DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n"); +#ifdef ZSTD_GZCOMPRESS + DISPLAYOUT( "--format=gzip : compress files to the .gz format \n"); +#endif +#ifdef ZSTD_LZMACOMPRESS + DISPLAYOUT( "--format=xz : compress files to the .xz format \n"); + DISPLAYOUT( "--format=lzma : compress files to the .lzma format \n"); +#endif +#ifdef ZSTD_LZ4COMPRESS + DISPLAYOUT( "--format=lz4 : compress files to the .lz4 format \n"); +#endif +#endif /* !ZSTD_NOCOMPRESS */ + +#ifndef ZSTD_NODECOMPRESS + DISPLAYOUT( "\n"); + DISPLAYOUT( "Advanced decompression arguments : \n"); + DISPLAYOUT( " -l : print information about zstd compressed files \n"); + DISPLAYOUT( "--test : test compressed file integrity \n"); + DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n"); +# if ZSTD_SPARSE_DEFAULT + DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n"); +# else + DISPLAYOUT( "--[no-]sparse : sparse mode (default: disabled) \n"); +# endif +#endif /* ZSTD_NODECOMPRESS */ + +#ifndef ZSTD_NODICT + DISPLAYOUT( "\n"); + DISPLAYOUT( "Dictionary builder : \n"); + DISPLAYOUT( "--train ## : create a dictionary from a training set of files \n"); + DISPLAYOUT( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args \n"); + DISPLAYOUT( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args \n"); + DISPLAYOUT( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u) \n", g_defaultSelectivityLevel); + DISPLAYOUT( " -o DICT : DICT is dictionary name (default: %s) \n", g_defaultDictName); + DISPLAYOUT( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize); + DISPLAYOUT( "--dictID=# : force dictionary ID to specified value (default: random) \n"); +#endif + +#ifndef ZSTD_NOBENCH + DISPLAYOUT( "\n"); + DISPLAYOUT( "Benchmark arguments : \n"); + DISPLAYOUT( " -b# : benchmark file(s), using # compression level (default: %d) \n", ZSTDCLI_CLEVEL_DEFAULT); + DISPLAYOUT( " -e# : test all compression levels successively from -b# to -e# (default: 1) \n"); + DISPLAYOUT( " -i# : minimum evaluation time in seconds (default: 3s) \n"); + DISPLAYOUT( " -B# : cut file into independent blocks of size # (default: no block) \n"); + DISPLAYOUT( " -S : output one benchmark result per input file (default: consolidated result) \n"); + DISPLAYOUT( "--priority=rt : set process priority to real-time \n"); +#endif + +} + +static void badusage(const char* programName) +{ + DISPLAYLEVEL(1, "Incorrect parameters \n"); + if (g_displayLevel >= 2) usage(stderr, programName); +} + +static void waitEnter(void) +{ + int unused; + DISPLAY("Press enter to continue... \n"); + unused = getchar(); + (void)unused; +} + +static const char* lastNameFromPath(const char* path) +{ + const char* name = path; + if (strrchr(name, '/')) name = strrchr(name, '/') + 1; + if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */ + return name; +} + +/*! exeNameMatch() : + @return : a non-zero value if exeName matches test, excluding the extension + */ +static int exeNameMatch(const char* exeName, const char* test) +{ + return !strncmp(exeName, test, strlen(test)) && + (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); +} + +static void errorOut(const char* msg) +{ + DISPLAY("%s \n", msg); exit(1); +} + +/*! readU32FromCharChecked() : + * @return 0 if success, and store the result in *value. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * @return 1 if an overflow error occurs */ +static int readU32FromCharChecked(const char** stringPtr, unsigned* value) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + unsigned const max = ((unsigned)(-1)) / 10; + unsigned last = result; + if (result > max) return 1; /* overflow error */ + result *= 10; + result += (unsigned)(**stringPtr - '0'); + if (result < last) return 1; /* overflow error */ + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + unsigned const maxK = ((unsigned)(-1)) >> 10; + if (result > maxK) return 1; /* overflow error */ + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) return 1; /* overflow error */ + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + *value = result; + return 0; +} + +/*! readU32FromChar() : + * @return : unsigned integer value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function will exit() program if digit sequence overflows */ +static unsigned readU32FromChar(const char** stringPtr) { + static const char errorMsg[] = "error: numeric value overflows 32-bit unsigned int"; + unsigned result; + if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); } + return result; +} + +/*! readIntFromChar() : + * @return : signed integer value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function will exit() program if digit sequence overflows */ +static int readIntFromChar(const char** stringPtr) { + static const char errorMsg[] = "error: numeric value overflows 32-bit int"; + int sign = 1; + unsigned result; + if (**stringPtr=='-') { + (*stringPtr)++; + sign = -1; + } + if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); } + return (int) result * sign; +} + +/*! readSizeTFromCharChecked() : + * @return 0 if success, and store the result in *value. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * @return 1 if an overflow error occurs */ +static int readSizeTFromCharChecked(const char** stringPtr, size_t* value) +{ + size_t result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + size_t const max = ((size_t)(-1)) / 10; + size_t last = result; + if (result > max) return 1; /* overflow error */ + result *= 10; + result += (size_t)(**stringPtr - '0'); + if (result < last) return 1; /* overflow error */ + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + size_t const maxK = ((size_t)(-1)) >> 10; + if (result > maxK) return 1; /* overflow error */ + result <<= 10; + if (**stringPtr=='M') { + if (result > maxK) return 1; /* overflow error */ + result <<= 10; + } + (*stringPtr)++; /* skip `K` or `M` */ + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + *value = result; + return 0; +} + +/*! readSizeTFromChar() : + * @return : size_t value read from input in `char` format. + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function will exit() program if digit sequence overflows */ +static size_t readSizeTFromChar(const char** stringPtr) { + static const char errorMsg[] = "error: numeric value overflows size_t"; + size_t result; + if (readSizeTFromCharChecked(stringPtr, &result)) { errorOut(errorMsg); } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + + +#ifndef ZSTD_NODICT + +static const unsigned kDefaultRegression = 1; +/** + * parseCoverParameters() : + * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params + * @return 1 means that cover parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params) +{ + memset(params, 0, sizeof(*params)); + for (; ;) { + if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "split=")) { + unsigned splitPercentage = readU32FromChar(&stringPtr); + params->splitPoint = (double)splitPercentage / 100.0; + if (stringPtr[0]==',') { stringPtr++; continue; } else break; + } + if (longCommandWArg(&stringPtr, "shrink")) { + params->shrinkDictMaxRegression = kDefaultRegression; + params->shrinkDict = 1; + if (stringPtr[0]=='=') { + stringPtr++; + params->shrinkDictMaxRegression = readU32FromChar(&stringPtr); + } + if (stringPtr[0]==',') { + stringPtr++; + continue; + } + else break; + } + return 0; + } + if (stringPtr[0] != 0) return 0; + DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression); + return 1; +} + +/** + * parseFastCoverParameters() : + * reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params + * @return 1 means that fastcover parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params) +{ + memset(params, 0, sizeof(*params)); + for (; ;) { + if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "split=")) { + unsigned splitPercentage = readU32FromChar(&stringPtr); + params->splitPoint = (double)splitPercentage / 100.0; + if (stringPtr[0]==',') { stringPtr++; continue; } else break; + } + if (longCommandWArg(&stringPtr, "shrink")) { + params->shrinkDictMaxRegression = kDefaultRegression; + params->shrinkDict = 1; + if (stringPtr[0]=='=') { + stringPtr++; + params->shrinkDictMaxRegression = readU32FromChar(&stringPtr); + } + if (stringPtr[0]==',') { + stringPtr++; + continue; + } + else break; + } + return 0; + } + if (stringPtr[0] != 0) return 0; + DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression); + return 1; +} + +/** + * parseLegacyParameters() : + * reads legacy dictionary builder parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity + * @return 1 means that legacy dictionary builder parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity) +{ + if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; } + *selectivity = readU32FromChar(&stringPtr); + if (stringPtr[0] != 0) return 0; + DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity); + return 1; +} + +static ZDICT_cover_params_t defaultCoverParams(void) +{ + ZDICT_cover_params_t params; + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + params.splitPoint = 1.0; + params.shrinkDict = 0; + params.shrinkDictMaxRegression = kDefaultRegression; + return params; +} + +static ZDICT_fastCover_params_t defaultFastCoverParams(void) +{ + ZDICT_fastCover_params_t params; + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.f = 20; + params.steps = 4; + params.splitPoint = 0.75; /* different from default splitPoint of cover */ + params.accel = DEFAULT_ACCEL; + params.shrinkDict = 0; + params.shrinkDictMaxRegression = kDefaultRegression; + return params; +} +#endif + + +/** parseAdaptParameters() : + * reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr. + * Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized. + * There is no guarantee that any of these values will be updated. + * @return 1 means that parsing was successful, + * @return 0 in case of malformed parameters + */ +static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr) +{ + for ( ; ;) { + if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + DISPLAYLEVEL(4, "invalid compression parameter \n"); + return 0; + } + if (stringPtr[0] != 0) return 0; /* check the end of string */ + if (*adaptMinPtr > *adaptMaxPtr) { + DISPLAYLEVEL(4, "incoherent adaptation limits \n"); + return 0; + } + return 1; +} + + +/** parseCompressionParameters() : + * reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6") into *params + * @return 1 means that compression parameters were correct + * @return 0 in case of malformed parameters + */ +static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params) +{ + for ( ; ;) { + if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "minMatch=") || longCommandWArg(&stringPtr, "mml=")) { params->minMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "lhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmMinMatch=") || longCommandWArg(&stringPtr, "lmml=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "lblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + if (longCommandWArg(&stringPtr, "ldmHashRateLog=") || longCommandWArg(&stringPtr, "lhrlog=")) { g_ldmHashRateLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; } + DISPLAYLEVEL(4, "invalid compression parameter \n"); + return 0; + } + + DISPLAYLEVEL(4, "windowLog=%d, chainLog=%d, hashLog=%d, searchLog=%d \n", params->windowLog, params->chainLog, params->hashLog, params->searchLog); + DISPLAYLEVEL(4, "minMatch=%d, targetLength=%d, strategy=%d \n", params->minMatch, params->targetLength, params->strategy); + if (stringPtr[0] != 0) return 0; /* check the end of string */ + return 1; +} + +static void printVersion(void) +{ + if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) { + DISPLAYOUT("%s\n", ZSTD_VERSION_STRING); + return; + } + + DISPLAYOUT(WELCOME_MESSAGE); + if (g_displayLevel >= 3) { + /* format support */ + DISPLAYOUT("*** supports: zstd"); + #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8) + DISPLAYOUT(", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT); + #endif + #ifdef ZSTD_GZCOMPRESS + DISPLAYOUT(", gzip"); + #endif + #ifdef ZSTD_LZ4COMPRESS + DISPLAYOUT(", lz4"); + #endif + #ifdef ZSTD_LZMACOMPRESS + DISPLAYOUT(", lzma, xz "); + #endif + DISPLAYOUT("\n"); + if (g_displayLevel >= 4) { + /* posix support */ + #ifdef _POSIX_C_SOURCE + DISPLAYOUT("_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE); + #endif + #ifdef _POSIX_VERSION + DISPLAYOUT("_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION); + #endif + #ifdef PLATFORM_POSIX_VERSION + DISPLAYOUT("PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION); + #endif + } } +} + +#define ZSTD_NB_STRATEGIES 9 +static const char* ZSTD_strategyMap[ZSTD_NB_STRATEGIES + 1] = { "", "ZSTD_fast", + "ZSTD_dfast", "ZSTD_greedy", "ZSTD_lazy", "ZSTD_lazy2", "ZSTD_btlazy2", + "ZSTD_btopt", "ZSTD_btultra", "ZSTD_btultra2"}; + +#ifndef ZSTD_NOCOMPRESS + +static void printDefaultCParams(const char* filename, const char* dictFileName, int cLevel) { + unsigned long long fileSize = UTIL_getFileSize(filename); + const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0; + const ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, fileSize, dictSize); + if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAY("%s (%u bytes)\n", filename, (unsigned)fileSize); + else DISPLAY("%s (src size unknown)\n", filename); + DISPLAY(" - windowLog : %u\n", cParams.windowLog); + DISPLAY(" - chainLog : %u\n", cParams.chainLog); + DISPLAY(" - hashLog : %u\n", cParams.hashLog); + DISPLAY(" - searchLog : %u\n", cParams.searchLog); + DISPLAY(" - minMatch : %u\n", cParams.minMatch); + DISPLAY(" - targetLength : %u\n", cParams.targetLength); + assert(cParams.strategy < ZSTD_NB_STRATEGIES + 1); + DISPLAY(" - strategy : %s (%u)\n", ZSTD_strategyMap[(int)cParams.strategy], (unsigned)cParams.strategy); +} + +static void printActualCParams(const char* filename, const char* dictFileName, int cLevel, const ZSTD_compressionParameters* cParams) { + unsigned long long fileSize = UTIL_getFileSize(filename); + const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0; + ZSTD_compressionParameters actualCParams = ZSTD_getCParams(cLevel, fileSize, dictSize); + assert(g_displayLevel >= 4); + actualCParams.windowLog = cParams->windowLog == 0 ? actualCParams.windowLog : cParams->windowLog; + actualCParams.chainLog = cParams->chainLog == 0 ? actualCParams.chainLog : cParams->chainLog; + actualCParams.hashLog = cParams->hashLog == 0 ? actualCParams.hashLog : cParams->hashLog; + actualCParams.searchLog = cParams->searchLog == 0 ? actualCParams.searchLog : cParams->searchLog; + actualCParams.minMatch = cParams->minMatch == 0 ? actualCParams.minMatch : cParams->minMatch; + actualCParams.targetLength = cParams->targetLength == 0 ? actualCParams.targetLength : cParams->targetLength; + actualCParams.strategy = cParams->strategy == 0 ? actualCParams.strategy : cParams->strategy; + DISPLAY("--zstd=wlog=%d,clog=%d,hlog=%d,slog=%d,mml=%d,tlen=%d,strat=%d\n", + actualCParams.windowLog, actualCParams.chainLog, actualCParams.hashLog, actualCParams.searchLog, + actualCParams.minMatch, actualCParams.targetLength, actualCParams.strategy); +} + +#endif + +/* Environment variables for parameter setting */ +#define ENV_CLEVEL "ZSTD_CLEVEL" +#define ENV_NBTHREADS "ZSTD_NBTHREADS" /* takes lower precedence than directly specifying -T# in the CLI */ + +/* pick up environment variable */ +static int init_cLevel(void) { + const char* const env = getenv(ENV_CLEVEL); + if (env != NULL) { + const char* ptr = env; + int sign = 1; + if (*ptr == '-') { + sign = -1; + ptr++; + } else if (*ptr == '+') { + ptr++; + } + + if ((*ptr>='0') && (*ptr<='9')) { + unsigned absLevel; + if (readU32FromCharChecked(&ptr, &absLevel)) { + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_CLEVEL, env); + return ZSTDCLI_CLEVEL_DEFAULT; + } else if (*ptr == 0) { + return sign * (int)absLevel; + } } + + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid integer value \n", ENV_CLEVEL, env); + } + + return ZSTDCLI_CLEVEL_DEFAULT; +} + +#ifdef ZSTD_MULTITHREAD +static unsigned init_nbThreads(void) { + const char* const env = getenv(ENV_NBTHREADS); + if (env != NULL) { + const char* ptr = env; + if ((*ptr>='0') && (*ptr<='9')) { + unsigned nbThreads; + if (readU32FromCharChecked(&ptr, &nbThreads)) { + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env); + return ZSTDCLI_NBTHREADS_DEFAULT; + } else if (*ptr == 0) { + return nbThreads; + } + } + DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env); + } + + return ZSTDCLI_NBTHREADS_DEFAULT; +} +#endif + +#define NEXT_FIELD(ptr) { \ + if (*argument == '=') { \ + ptr = ++argument; \ + argument += strlen(ptr); \ + } else { \ + argNb++; \ + if (argNb >= argCount) { \ + DISPLAY("error: missing command argument \n"); \ + CLEAN_RETURN(1); \ + } \ + ptr = argv[argNb]; \ + assert(ptr != NULL); \ + if (ptr[0]=='-') { \ + DISPLAY("error: command cannot be separated from its argument by another command \n"); \ + CLEAN_RETURN(1); \ +} } } + +#define NEXT_UINT32(val32) { \ + const char* __nb; \ + NEXT_FIELD(__nb); \ + val32 = readU32FromChar(&__nb); \ +} + +typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode; + +#define CLEAN_RETURN(i) { operationResult = (i); goto _end; } + +#ifdef ZSTD_NOCOMPRESS +/* symbols from compression library are not defined and should not be invoked */ +# define MINCLEVEL -99 +# define MAXCLEVEL 22 +#else +# define MINCLEVEL ZSTD_minCLevel() +# define MAXCLEVEL ZSTD_maxCLevel() +#endif + +int main(int argCount, const char* argv[]) +{ + int argNb, + followLinks = 0, + allowBlockDevices = 0, + forceStdin = 0, + forceStdout = 0, + hasStdout = 0, + ldmFlag = 0, + main_pause = 0, + nbWorkers = 0, + adapt = 0, + useRowMatchFinder = 0, + adaptMin = MINCLEVEL, + adaptMax = MAXCLEVEL, + rsyncable = 0, + nextArgumentsAreFiles = 0, + operationResult = 0, + separateFiles = 0, + setRealTimePrio = 0, + singleThread = 0, +#ifdef ZSTD_MULTITHREAD + defaultLogicalCores = 0, +#endif + showDefaultCParams = 0, + ultra=0, + contentSize=1; + double compressibility = 0.5; + unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */ + size_t blockSize = 0; + + FIO_prefs_t* const prefs = FIO_createPreferences(); + FIO_ctx_t* const fCtx = FIO_createContext(); + zstd_operation_mode operation = zom_compress; + ZSTD_compressionParameters compressionParams; + int cLevel = init_cLevel(); + int cLevelLast = MINCLEVEL - 1; /* lower than minimum */ + unsigned recursive = 0; + unsigned memLimit = 0; + FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */ + FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */ + const char* programName = argv[0]; + const char* outFileName = NULL; + const char* outDirName = NULL; + const char* outMirroredDirName = NULL; + const char* dictFileName = NULL; + const char* patchFromDictFileName = NULL; + const char* suffix = ZSTD_EXTENSION; + unsigned maxDictSize = g_defaultMaxDictSize; + unsigned dictID = 0; + size_t streamSrcSize = 0; + size_t targetCBlockSize = 0; + size_t srcSizeHint = 0; + int dictCLevel = g_defaultDictCLevel; + unsigned dictSelect = g_defaultSelectivityLevel; +#ifndef ZSTD_NODICT + ZDICT_cover_params_t coverParams = defaultCoverParams(); + ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams(); + dictType dict = fastCover; +#endif +#ifndef ZSTD_NOBENCH + BMK_advancedParams_t benchParams = BMK_initAdvancedParams(); +#endif + ZSTD_paramSwitch_e literalCompressionMode = ZSTD_ps_auto; + + + /* init */ + checkLibVersion(); + (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */ + (void)memLimit; + assert(argCount >= 1); + if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); } + programName = lastNameFromPath(programName); +#ifdef ZSTD_MULTITHREAD + nbWorkers = init_nbThreads(); +#endif + + /* preset behaviors */ + if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0; + if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress; + if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */ + if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like gzip */ + if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); } /* behave like gunzip, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like lzma */ + if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like xz */ + if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unxz, also supports multiple formats */ + if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like lz4 */ + if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */ + memset(&compressionParams, 0, sizeof(compressionParams)); + + /* init crash handler */ + FIO_addAbortHandler(); + + /* command switches */ + for (argNb=1; argNb<argCount; argNb++) { + const char* argument = argv[argNb]; + if (!argument) continue; /* Protection if argument empty */ + + if (nextArgumentsAreFiles) { + UTIL_refFilename(filenames, argument); + continue; + } + + /* "-" means stdin/stdout */ + if (!strcmp(argument, "-")){ + UTIL_refFilename(filenames, stdinmark); + continue; + } + + /* Decode commands (note : aggregated commands are allowed) */ + if (argument[0]=='-') { + + if (argument[1]=='-') { + /* long commands (--long-word) */ + if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */ + if (!strcmp(argument, "--list")) { operation=zom_list; continue; } + if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; } + if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; } + if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; } + if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; } + if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); } + if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); } + if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; } + if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; } + if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; } + if (!strcmp(argument, "--ultra")) { ultra=1; continue; } + if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; } + if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; } + if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; } + if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; } + if (!strcmp(argument, "--test")) { operation=zom_test; continue; } + if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; } + if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; } + if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; } + if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; } + if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; } + if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; } + if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; } + if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; } + if (!strcmp(argument, "--adapt")) { adapt = 1; continue; } + if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; } + if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; } + if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; } + if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; } + if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; } +#ifdef ZSTD_GZCOMPRESS + if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; } +#endif +#ifdef ZSTD_LZMACOMPRESS + if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; } + if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; } +#endif +#ifdef ZSTD_LZ4COMPRESS + if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; } +#endif + if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; } + if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_ps_enable; continue; } + if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_ps_disable; continue; } + if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; } + if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; } + if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; } + + /* long commands with arguments */ +#ifndef ZSTD_NODICT + if (longCommandWArg(&argument, "--train-cover")) { + operation = zom_train; + if (outFileName == NULL) + outFileName = g_defaultDictName; + dict = cover; + /* Allow optional arguments following an = */ + if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); } + else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); } + else if (!parseCoverParameters(argument, &coverParams)) { badusage(programName); CLEAN_RETURN(1); } + continue; + } + if (longCommandWArg(&argument, "--train-fastcover")) { + operation = zom_train; + if (outFileName == NULL) + outFileName = g_defaultDictName; + dict = fastCover; + /* Allow optional arguments following an = */ + if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); } + else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); } + else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badusage(programName); CLEAN_RETURN(1); } + continue; + } + if (longCommandWArg(&argument, "--train-legacy")) { + operation = zom_train; + if (outFileName == NULL) + outFileName = g_defaultDictName; + dict = legacy; + /* Allow optional arguments following an = */ + if (*argument == 0) { continue; } + else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); } + else if (!parseLegacyParameters(argument, &dictSelect)) { badusage(programName); CLEAN_RETURN(1); } + continue; + } +#endif + if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; } + if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; } + if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; } + if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; } + if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; } + if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; } + if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; } + if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; } + if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; } + if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; } + if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; } + if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; } +#ifdef ZSTD_MULTITHREAD + if (longCommandWArg(&argument, "--auto-threads")) { + const char* threadDefault = NULL; + NEXT_FIELD(threadDefault); + if (strcmp(threadDefault, "logical") == 0) + defaultLogicalCores = 1; + continue; + } +#endif +#ifdef UTIL_HAS_MIRRORFILELIST + if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; } +#endif +#ifndef ZSTD_NOTRACE + if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; } +#endif + if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; } + if (longCommandWArg(&argument, "--long")) { + unsigned ldmWindowLog = 0; + ldmFlag = 1; + /* Parse optional window log */ + if (*argument == '=') { + ++argument; + ldmWindowLog = readU32FromChar(&argument); + } else if (*argument != 0) { + /* Invalid character following --long */ + badusage(programName); + CLEAN_RETURN(1); + } + /* Only set windowLog if not already set by --zstd */ + if (compressionParams.windowLog == 0) + compressionParams.windowLog = ldmWindowLog; + continue; + } +#ifndef ZSTD_NOCOMPRESS /* linking ZSTD_minCLevel() requires compression support */ + if (longCommandWArg(&argument, "--fast")) { + /* Parse optional acceleration factor */ + if (*argument == '=') { + U32 const maxFast = (U32)-ZSTD_minCLevel(); + U32 fastLevel; + ++argument; + fastLevel = readU32FromChar(&argument); + if (fastLevel > maxFast) fastLevel = maxFast; + if (fastLevel) { + dictCLevel = cLevel = -(int)fastLevel; + } else { + badusage(programName); + CLEAN_RETURN(1); + } + } else if (*argument != 0) { + /* Invalid character following --fast */ + badusage(programName); + CLEAN_RETURN(1); + } else { + cLevel = -1; /* default for --fast */ + } + continue; + } +#endif + + if (longCommandWArg(&argument, "--filelist")) { + const char* listName; + NEXT_FIELD(listName); + UTIL_refFilename(file_of_names, listName); + continue; + } + + /* fall-through, will trigger bad_usage() later on */ + } + + argument++; + while (argument[0]!=0) { + +#ifndef ZSTD_NOCOMPRESS + /* compression Level */ + if ((*argument>='0') && (*argument<='9')) { + dictCLevel = cLevel = (int)readU32FromChar(&argument); + continue; + } +#endif + + switch(argument[0]) + { + /* Display help */ + case 'V': printVersion(); CLEAN_RETURN(0); /* Version Only */ + case 'H': + case 'h': usage_advanced(programName); CLEAN_RETURN(0); + + /* Compress */ + case 'z': operation=zom_compress; argument++; break; + + /* Decoding */ + case 'd': +#ifndef ZSTD_NOBENCH + benchParams.mode = BMK_decodeOnly; + if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */ +#endif + operation=zom_decompress; argument++; break; + + /* Force stdout, even if stdout==console */ + case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break; + + /* Use file content as dictionary */ + case 'D': argument++; NEXT_FIELD(dictFileName); break; + + /* Overwrite */ + case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; argument++; break; + + /* Verbose mode */ + case 'v': g_displayLevel++; argument++; break; + + /* Quiet mode */ + case 'q': g_displayLevel--; argument++; break; + + /* keep source file (default) */ + case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break; + + /* Checksum */ + case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break; + + /* test compressed file */ + case 't': operation=zom_test; argument++; break; + + /* destination file name */ + case 'o': argument++; NEXT_FIELD(outFileName); break; + + /* limit memory */ + case 'M': + argument++; + memLimit = readU32FromChar(&argument); + break; + case 'l': operation=zom_list; argument++; break; +#ifdef UTIL_HAS_CREATEFILELIST + /* recursive */ + case 'r': recursive=1; argument++; break; +#endif + +#ifndef ZSTD_NOBENCH + /* Benchmark */ + case 'b': + operation=zom_bench; + argument++; + break; + + /* range bench (benchmark only) */ + case 'e': + /* compression Level */ + argument++; + cLevelLast = (int)readU32FromChar(&argument); + break; + + /* Modify Nb Iterations (benchmark only) */ + case 'i': + argument++; + bench_nbSeconds = readU32FromChar(&argument); + break; + + /* cut input into blocks (benchmark only) */ + case 'B': + argument++; + blockSize = readU32FromChar(&argument); + break; + + /* benchmark files separately (hidden option) */ + case 'S': + argument++; + separateFiles = 1; + break; + +#endif /* ZSTD_NOBENCH */ + + /* nb of threads (hidden option) */ + case 'T': + argument++; + nbWorkers = (int)readU32FromChar(&argument); + break; + + /* Dictionary Selection level */ + case 's': + argument++; + dictSelect = readU32FromChar(&argument); + break; + + /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */ + case 'p': argument++; +#ifndef ZSTD_NOBENCH + if ((*argument>='0') && (*argument<='9')) { + benchParams.additionalParam = (int)readU32FromChar(&argument); + } else +#endif + main_pause=1; + break; + + /* Select compressibility of synthetic sample */ + case 'P': + argument++; + compressibility = (double)readU32FromChar(&argument) / 100; + break; + + /* unknown command */ + default : badusage(programName); CLEAN_RETURN(1); + } + } + continue; + } /* if (argument[0]=='-') */ + + /* none of the above : add filename to list */ + UTIL_refFilename(filenames, argument); + } + + /* Welcome message (if verbose) */ + DISPLAYLEVEL(3, WELCOME_MESSAGE); + +#ifdef ZSTD_MULTITHREAD + if ((nbWorkers==0) && (!singleThread)) { + /* automatically set # workers based on # of reported cpus */ + if (defaultLogicalCores) { + nbWorkers = UTIL_countLogicalCores(); + DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers); + } else { + nbWorkers = UTIL_countPhysicalCores(); + DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers); + } + } +#else + (void)singleThread; (void)nbWorkers; +#endif + + g_utilDisplayLevel = g_displayLevel; + +#ifdef UTIL_HAS_CREATEFILELIST + if (!followLinks) { + unsigned u, fileNamesNb; + unsigned const nbFilenames = (unsigned)filenames->tableSize; + for (u=0, fileNamesNb=0; u<nbFilenames; u++) { + if ( UTIL_isLink(filenames->fileNames[u]) + && !UTIL_isFIFO(filenames->fileNames[u]) + ) { + DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]); + } else { + filenames->fileNames[fileNamesNb++] = filenames->fileNames[u]; + } } + if (fileNamesNb == 0 && nbFilenames > 0) /* all names are eliminated */ + CLEAN_RETURN(1); + filenames->tableSize = fileNamesNb; + } /* if (!followLinks) */ + + /* read names from a file */ + if (file_of_names->tableSize) { + size_t const nbFileLists = file_of_names->tableSize; + size_t flNb; + for (flNb=0; flNb < nbFileLists; flNb++) { + FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]); + if (fnt==NULL) { + DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]); + CLEAN_RETURN(1); + } + filenames = UTIL_mergeFileNamesTable(filenames, fnt); + } + } + + if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */ + UTIL_expandFNT(&filenames, followLinks); + } +#else + (void)followLinks; +#endif + + if (operation == zom_list) { +#ifndef ZSTD_NODECOMPRESS + int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel); + CLEAN_RETURN(ret); +#else + DISPLAY("file information is not supported \n"); + CLEAN_RETURN(1); +#endif + } + + /* Check if benchmark is selected */ + if (operation==zom_bench) { +#ifndef ZSTD_NOBENCH + benchParams.blockSize = blockSize; + benchParams.nbWorkers = nbWorkers; + benchParams.realTime = (unsigned)setRealTimePrio; + benchParams.nbSeconds = bench_nbSeconds; + benchParams.ldmFlag = ldmFlag; + benchParams.ldmMinMatch = (int)g_ldmMinMatch; + benchParams.ldmHashLog = (int)g_ldmHashLog; + benchParams.useRowMatchFinder = useRowMatchFinder; + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) { + benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog; + } + if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) { + benchParams.ldmHashRateLog = (int)g_ldmHashRateLog; + } + benchParams.literalCompressionMode = literalCompressionMode; + + if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); + if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel(); + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) + DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + if (filenames->tableSize > 0) { + if(separateFiles) { + unsigned i; + for(i = 0; i < filenames->tableSize; i++) { + int c; + DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]); + for(c = cLevel; c <= cLevelLast; c++) { + BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams); + } } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams); + } } + } else { + for(; cLevel <= cLevelLast; cLevel++) { + BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams); + } } + +#else + (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility; +#endif + goto _end; + } + + /* Check if dictionary builder is selected */ + if (operation==zom_train) { +#ifndef ZSTD_NODICT + ZDICT_params_t zParams; + zParams.compressionLevel = dictCLevel; + zParams.notificationLevel = (unsigned)g_displayLevel; + zParams.dictID = dictID; + if (dict == cover) { + int const optimize = !coverParams.k || !coverParams.d; + coverParams.nbThreads = (unsigned)nbWorkers; + coverParams.zParams = zParams; + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit); + } else if (dict == fastCover) { + int const optimize = !fastCoverParams.k || !fastCoverParams.d; + fastCoverParams.nbThreads = (unsigned)nbWorkers; + fastCoverParams.zParams = zParams; + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit); + } else { + ZDICT_legacy_params_t dictParams; + memset(&dictParams, 0, sizeof(dictParams)); + dictParams.selectivityLevel = dictSelect; + dictParams.zParams = zParams; + operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit); + } +#else + (void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */ + DISPLAYLEVEL(1, "training mode not available \n"); + operationResult = 1; +#endif + goto _end; + } + +#ifndef ZSTD_NODECOMPRESS + if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */ +#endif + + /* No input filename ==> use stdin and stdout */ + if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark); + if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName) + outFileName = stdoutmark; /* when input is stdin, default output is stdout */ + + /* Check if input/output defined as console; trigger an error in this case */ + if (!forceStdin + && !strcmp(filenames->fileNames[0], stdinmark) + && IS_CONSOLE(stdin) ) { + DISPLAYLEVEL(1, "stdin is a console, aborting\n"); + CLEAN_RETURN(1); + } + if ( outFileName && !strcmp(outFileName, stdoutmark) + && IS_CONSOLE(stdout) + && !strcmp(filenames->fileNames[0], stdinmark) + && !forceStdout + && operation!=zom_decompress ) { + DISPLAYLEVEL(1, "stdout is a console, aborting\n"); + CLEAN_RETURN(1); + } + +#ifndef ZSTD_NOCOMPRESS + /* check compression level limits */ + { int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX; + if (cLevel > maxCLevel) { + DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel); + cLevel = maxCLevel; + } } +#endif + + if (showDefaultCParams) { + if (operation == zom_decompress) { + DISPLAY("error : can't use --show-default-cparams in decomrpession mode \n"); + CLEAN_RETURN(1); + } + } + + if (dictFileName != NULL && patchFromDictFileName != NULL) { + DISPLAY("error : can't use -D and --patch-from=# at the same time \n"); + CLEAN_RETURN(1); + } + + if (patchFromDictFileName != NULL && filenames->tableSize > 1) { + DISPLAY("error : can't use --patch-from=# on multiple files \n"); + CLEAN_RETURN(1); + } + + /* No status message in pipe mode (stdin - stdout) */ + hasStdout = outFileName && !strcmp(outFileName,stdoutmark); + + if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1; + + /* IO Stream/File */ + FIO_setHasStdoutOutput(fCtx, hasStdout); + FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize); + FIO_determineHasStdinInput(fCtx, filenames); + FIO_setNotificationLevel(g_displayLevel); + FIO_setAllowBlockDevices(prefs, allowBlockDevices); + FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL); + if (memLimit == 0) { + if (compressionParams.windowLog == 0) { + memLimit = (U32)1 << g_defaultMaxWindowLog; + } else { + memLimit = (U32)1 << (compressionParams.windowLog & 31); + } } + if (patchFromDictFileName != NULL) + dictFileName = patchFromDictFileName; + FIO_setMemLimit(prefs, memLimit); + if (operation==zom_compress) { +#ifndef ZSTD_NOCOMPRESS + FIO_setContentSize(prefs, contentSize); + FIO_setNbWorkers(prefs, nbWorkers); + FIO_setBlockSize(prefs, (int)blockSize); + if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog); + FIO_setLdmFlag(prefs, (unsigned)ldmFlag); + FIO_setLdmHashLog(prefs, (int)g_ldmHashLog); + FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch); + if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog); + if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog); + FIO_setAdaptiveMode(prefs, (unsigned)adapt); + FIO_setUseRowMatchFinder(prefs, useRowMatchFinder); + FIO_setAdaptMin(prefs, adaptMin); + FIO_setAdaptMax(prefs, adaptMax); + FIO_setRsyncable(prefs, rsyncable); + FIO_setStreamSrcSize(prefs, streamSrcSize); + FIO_setTargetCBlockSize(prefs, targetCBlockSize); + FIO_setSrcSizeHint(prefs, srcSizeHint); + FIO_setLiteralCompressionMode(prefs, literalCompressionMode); + if (adaptMin > cLevel) cLevel = adaptMin; + if (adaptMax < cLevel) cLevel = adaptMax; + + /* Compare strategies constant with the ground truth */ + { ZSTD_bounds strategyBounds = ZSTD_cParam_getBounds(ZSTD_c_strategy); + assert(ZSTD_NB_STRATEGIES == strategyBounds.upperBound); + (void)strategyBounds; } + + if (showDefaultCParams || g_displayLevel >= 4) { + size_t fileNb; + for (fileNb = 0; fileNb < (size_t)filenames->tableSize; fileNb++) { + if (showDefaultCParams) + printDefaultCParams(filenames->fileNames[fileNb], dictFileName, cLevel); + if (g_displayLevel >= 4) + printActualCParams(filenames->fileNames[fileNb], dictFileName, cLevel, &compressionParams); + } + } + + if (g_displayLevel >= 4) + FIO_displayCompressionParameters(prefs); + if ((filenames->tableSize==1) && outFileName) + operationResult = FIO_compressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams); + else + operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams); +#else + (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */ + DISPLAY("Compression not supported \n"); +#endif + } else { /* decompression or test */ +#ifndef ZSTD_NODECOMPRESS + if (filenames->tableSize == 1 && outFileName) { + operationResult = FIO_decompressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName); + } else { + operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName); + } +#else + DISPLAY("Decompression not supported \n"); +#endif + } + +_end: + FIO_freePreferences(prefs); + FIO_freeContext(fCtx); + if (main_pause) waitEnter(); + UTIL_freeFileNamesTable(filenames); + UTIL_freeFileNamesTable(file_of_names); +#ifndef ZSTD_NOTRACE + TRACE_finish(); +#endif + + return operationResult; +} diff --git a/contrib/libs/zstd/programs/zstdcli_trace.c b/contrib/libs/zstd/programs/zstdcli_trace.c new file mode 100644 index 0000000000..b3b977feb5 --- /dev/null +++ b/contrib/libs/zstd/programs/zstdcli_trace.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstdcli_trace.h" + +#include <stdio.h> +#include <stdlib.h> + +#include "timefn.h" +#include "util.h" + +#define ZSTD_STATIC_LINKING_ONLY +#include "../lib/zstd.h" +/* We depend on the trace header to avoid duplicating the ZSTD_trace struct. + * But, we check the version so it is compatible with dynamic linking. + */ +#include "../lib/common/zstd_trace.h" +/* We only use macros from threading.h so it is compatible with dynamic linking */ +#include "../lib/common/threading.h" + +#if ZSTD_TRACE + +static FILE* g_traceFile = NULL; +static int g_mutexInit = 0; +static ZSTD_pthread_mutex_t g_mutex; +static UTIL_time_t g_enableTime = UTIL_TIME_INITIALIZER; + +void TRACE_enable(char const* filename) +{ + int const writeHeader = !UTIL_isRegularFile(filename); + if (g_traceFile) + fclose(g_traceFile); + g_traceFile = fopen(filename, "a"); + if (g_traceFile && writeHeader) { + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, "Algorithm, Version, Method, Mode, Level, Workers, Dictionary Size, Uncompressed Size, Compressed Size, Duration Nanos, Compression Ratio, Speed MB/s\n"); + } + g_enableTime = UTIL_getTime(); + if (!g_mutexInit) { + if (!ZSTD_pthread_mutex_init(&g_mutex, NULL)) { + g_mutexInit = 1; + } else { + TRACE_finish(); + } + } +} + +void TRACE_finish(void) +{ + if (g_traceFile) { + fclose(g_traceFile); + } + g_traceFile = NULL; + if (g_mutexInit) { + ZSTD_pthread_mutex_destroy(&g_mutex); + g_mutexInit = 0; + } +} + +static void TRACE_log(char const* method, PTime duration, ZSTD_Trace const* trace) +{ + int level = 0; + int workers = 0; + double const ratio = (double)trace->uncompressedSize / (double)trace->compressedSize; + double const speed = ((double)trace->uncompressedSize * 1000) / (double)duration; + if (trace->params) { + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_compressionLevel, &level); + ZSTD_CCtxParams_getParameter(trace->params, ZSTD_c_nbWorkers, &workers); + } + assert(g_traceFile != NULL); + + ZSTD_pthread_mutex_lock(&g_mutex); + /* Fields: + * algorithm + * version + * method + * streaming + * level + * workers + * dictionary size + * uncompressed size + * compressed size + * duration nanos + * compression ratio + * speed MB/s + */ + fprintf(g_traceFile, + "zstd, %u, %s, %s, %d, %d, %llu, %llu, %llu, %llu, %.2f, %.2f\n", + trace->version, + method, + trace->streaming ? "streaming" : "single-pass", + level, + workers, + (unsigned long long)trace->dictionarySize, + (unsigned long long)trace->uncompressedSize, + (unsigned long long)trace->compressedSize, + (unsigned long long)duration, + ratio, + speed); + ZSTD_pthread_mutex_unlock(&g_mutex); +} + +/** + * These symbols override the weak symbols provided by the library. + */ + +ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx) +{ + (void)cctx; + if (g_traceFile == NULL) + return 0; + return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime); +} + +void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + PTime const beginNanos = (PTime)ctx; + PTime const endNanos = UTIL_clockSpanNano(g_enableTime); + PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0; + assert(g_traceFile != NULL); + assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */ + TRACE_log("compress", durationNanos, trace); +} + +ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx) +{ + (void)dctx; + if (g_traceFile == NULL) + return 0; + return (ZSTD_TraceCtx)UTIL_clockSpanNano(g_enableTime); +} + +void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace) +{ + PTime const beginNanos = (PTime)ctx; + PTime const endNanos = UTIL_clockSpanNano(g_enableTime); + PTime const durationNanos = endNanos > beginNanos ? endNanos - beginNanos : 0; + assert(g_traceFile != NULL); + assert(trace->version == ZSTD_VERSION_NUMBER); /* CLI version must match. */ + TRACE_log("decompress", durationNanos, trace); +} + +#else /* ZSTD_TRACE */ + +void TRACE_enable(char const* filename) +{ + (void)filename; +} + +void TRACE_finish(void) {} + +#endif /* ZSTD_TRACE */ diff --git a/contrib/libs/zstd/programs/zstdcli_trace.h b/contrib/libs/zstd/programs/zstdcli_trace.h new file mode 100644 index 0000000000..38c27dc04c --- /dev/null +++ b/contrib/libs/zstd/programs/zstdcli_trace.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDCLI_TRACE_H +#define ZSTDCLI_TRACE_H + +/** + * Enable tracing - log to filename. + */ +void TRACE_enable(char const* filename); + +/** + * Shut down the tracing library. + */ +void TRACE_finish(void); + +#endif /* ZSTDCLI_TRACE_H */ |