diff options
author | vvvv <vvvv@yandex-team.com> | 2024-11-07 12:29:36 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2024-11-07 13:49:47 +0300 |
commit | d4c258e9431675bab6745c8638df6e3dfd4dca6b (patch) | |
tree | b5efcfa11351152a4c872fccaea35749141c0b11 /yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c | |
parent | 13a4f274caef5cfdaf0263b24e4d6bdd5521472b (diff) | |
download | ydb-d4c258e9431675bab6745c8638df6e3dfd4dca6b.tar.gz |
Moved other yql/essentials libs YQL-19206
init
commit_hash:7d4c435602078407bbf20dd3c32f9c90d2bbcbc0
Diffstat (limited to 'yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c')
-rw-r--r-- | yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c new file mode 100644 index 00000000000..47b18b8c600 --- /dev/null +++ b/yql/essentials/parser/pg_wrapper/postgresql/src/common/compression.c @@ -0,0 +1,476 @@ +/*------------------------------------------------------------------------- + * + * compression.c + * + * Shared code for compression methods and specifications. + * + * A compression specification specifies the parameters that should be used + * when performing compression with a specific algorithm. The simplest + * possible compression specification is an integer, which sets the + * compression level. + * + * Otherwise, a compression specification is a comma-separated list of items, + * each having the form keyword or keyword=value. + * + * Currently, the supported keywords are "level", "long", and "workers". + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/compression.c + *------------------------------------------------------------------------- + */ + +#ifndef FRONTEND +#include "postgres.h" +#else +#include "postgres_fe.h" +#endif + +#ifdef USE_ZSTD +#error #include <zstd.h> +#endif +#ifdef HAVE_LIBZ +#include <zlib.h> +#endif + +#include "common/compression.h" + +static int expect_integer_value(char *keyword, char *value, + pg_compress_specification *result); +static bool expect_boolean_value(char *keyword, char *value, + pg_compress_specification *result); + +/* + * Look up a compression algorithm by name. Returns true and sets *algorithm + * if the name is recognized. Otherwise returns false. + */ +bool +parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm) +{ + if (strcmp(name, "none") == 0) + *algorithm = PG_COMPRESSION_NONE; + else if (strcmp(name, "gzip") == 0) + *algorithm = PG_COMPRESSION_GZIP; + else if (strcmp(name, "lz4") == 0) + *algorithm = PG_COMPRESSION_LZ4; + else if (strcmp(name, "zstd") == 0) + *algorithm = PG_COMPRESSION_ZSTD; + else + return false; + return true; +} + +/* + * Get the human-readable name corresponding to a particular compression + * algorithm. + */ +const char * +get_compress_algorithm_name(pg_compress_algorithm algorithm) +{ + switch (algorithm) + { + case PG_COMPRESSION_NONE: + return "none"; + case PG_COMPRESSION_GZIP: + return "gzip"; + case PG_COMPRESSION_LZ4: + return "lz4"; + case PG_COMPRESSION_ZSTD: + return "zstd"; + /* no default, to provoke compiler warnings if values are added */ + } + Assert(false); + return "???"; /* placate compiler */ +} + +/* + * Parse a compression specification for a specified algorithm. + * + * See the file header comments for a brief description of what a compression + * specification is expected to look like. + * + * On return, all fields of the result object will be initialized. + * In particular, result->parse_error will be NULL if no errors occurred + * during parsing, and will otherwise contain an appropriate error message. + * The caller may free this error message string using pfree, if desired. + * Note, however, even if there's no parse error, the string might not make + * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK. + * + * The compression level is assigned by default if not directly specified + * by the specification. + * + * Use validate_compress_specification() to find out whether a compression + * specification is semantically sensible. + */ +void +parse_compress_specification(pg_compress_algorithm algorithm, char *specification, + pg_compress_specification *result) +{ + int bare_level; + char *bare_level_endp; + + /* Initial setup of result object. */ + result->algorithm = algorithm; + result->options = 0; + result->parse_error = NULL; + + /* + * Assign a default level depending on the compression method. This may + * be enforced later. + */ + switch (result->algorithm) + { + case PG_COMPRESSION_NONE: + result->level = 0; + break; + case PG_COMPRESSION_LZ4: +#ifdef USE_LZ4 + result->level = 0; /* fast compression mode */ +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "LZ4"); +#endif + break; + case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD + result->level = ZSTD_CLEVEL_DEFAULT; +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "ZSTD"); +#endif + break; + case PG_COMPRESSION_GZIP: +#ifdef HAVE_LIBZ + result->level = Z_DEFAULT_COMPRESSION; +#else + result->parse_error = + psprintf(_("this build does not support compression with %s"), + "gzip"); +#endif + break; + } + + /* If there is no specification, we're done already. */ + if (specification == NULL) + return; + + /* As a special case, the specification can be a bare integer. */ + bare_level = strtol(specification, &bare_level_endp, 10); + if (specification != bare_level_endp && *bare_level_endp == '\0') + { + result->level = bare_level; + return; + } + + /* Look for comma-separated keyword or keyword=value entries. */ + while (1) + { + char *kwstart; + char *kwend; + char *vstart; + char *vend; + int kwlen; + int vlen; + bool has_value; + char *keyword; + char *value; + + /* Figure start, end, and length of next keyword and any value. */ + kwstart = kwend = specification; + while (*kwend != '\0' && *kwend != ',' && *kwend != '=') + ++kwend; + kwlen = kwend - kwstart; + if (*kwend != '=') + { + vstart = vend = NULL; + vlen = 0; + has_value = false; + } + else + { + vstart = vend = kwend + 1; + while (*vend != '\0' && *vend != ',') + ++vend; + vlen = vend - vstart; + has_value = true; + } + + /* Reject empty keyword. */ + if (kwlen == 0) + { + result->parse_error = + pstrdup(_("found empty string where a compression option was expected")); + break; + } + + /* Extract keyword and value as separate C strings. */ + keyword = palloc(kwlen + 1); + memcpy(keyword, kwstart, kwlen); + keyword[kwlen] = '\0'; + if (!has_value) + value = NULL; + else + { + value = palloc(vlen + 1); + memcpy(value, vstart, vlen); + value[vlen] = '\0'; + } + + /* Handle whatever keyword we found. */ + if (strcmp(keyword, "level") == 0) + { + result->level = expect_integer_value(keyword, value, result); + + /* + * No need to set a flag in "options", there is a default level + * set at least thanks to the logic above. + */ + } + else if (strcmp(keyword, "workers") == 0) + { + result->workers = expect_integer_value(keyword, value, result); + result->options |= PG_COMPRESSION_OPTION_WORKERS; + } + else if (strcmp(keyword, "long") == 0) + { + result->long_distance = expect_boolean_value(keyword, value, result); + result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE; + } + else + result->parse_error = + psprintf(_("unrecognized compression option: \"%s\""), keyword); + + /* Release memory, just to be tidy. */ + pfree(keyword); + if (value != NULL) + pfree(value); + + /* + * If we got an error or have reached the end of the string, stop. + * + * If there is no value, then the end of the keyword might have been + * the end of the string. If there is a value, then the end of the + * keyword cannot have been the end of the string, but the end of the + * value might have been. + */ + if (result->parse_error != NULL || + (vend == NULL ? *kwend == '\0' : *vend == '\0')) + break; + + /* Advance to next entry and loop around. */ + specification = vend == NULL ? kwend + 1 : vend + 1; + } +} + +/* + * Parse 'value' as an integer and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1. + */ +static int +expect_integer_value(char *keyword, char *value, pg_compress_specification *result) +{ + int ivalue; + char *ivalue_endp; + + if (value == NULL) + { + result->parse_error = + psprintf(_("compression option \"%s\" requires a value"), + keyword); + return -1; + } + + ivalue = strtol(value, &ivalue_endp, 10); + if (ivalue_endp == value || *ivalue_endp != '\0') + { + result->parse_error = + psprintf(_("value for compression option \"%s\" must be an integer"), + keyword); + return -1; + } + return ivalue; +} + +/* + * Parse 'value' as a boolean and return the result. + * + * If parsing fails, set result->parse_error to an appropriate message + * and return -1. The caller must check result->parse_error to determine if + * the call was successful. + * + * Valid values are: yes, no, on, off, 1, 0. + * + * Inspired by ParseVariableBool(). + */ +static bool +expect_boolean_value(char *keyword, char *value, pg_compress_specification *result) +{ + if (value == NULL) + return true; + + if (pg_strcasecmp(value, "yes") == 0) + return true; + if (pg_strcasecmp(value, "on") == 0) + return true; + if (pg_strcasecmp(value, "1") == 0) + return true; + + if (pg_strcasecmp(value, "no") == 0) + return false; + if (pg_strcasecmp(value, "off") == 0) + return false; + if (pg_strcasecmp(value, "0") == 0) + return false; + + result->parse_error = + psprintf(_("value for compression option \"%s\" must be a Boolean value"), + keyword); + return false; +} + +/* + * Returns NULL if the compression specification string was syntactically + * valid and semantically sensible. Otherwise, returns an error message. + * + * Does not test whether this build of PostgreSQL supports the requested + * compression method. + */ +char * +validate_compress_specification(pg_compress_specification *spec) +{ + int min_level = 1; + int max_level = 1; + int default_level = 0; + + /* If it didn't even parse OK, it's definitely no good. */ + if (spec->parse_error != NULL) + return spec->parse_error; + + /* + * Check that the algorithm expects a compression level and it is within + * the legal range for the algorithm. + */ + switch (spec->algorithm) + { + case PG_COMPRESSION_GZIP: + max_level = 9; +#ifdef HAVE_LIBZ + default_level = Z_DEFAULT_COMPRESSION; +#endif + break; + case PG_COMPRESSION_LZ4: + max_level = 12; + default_level = 0; /* fast mode */ + break; + case PG_COMPRESSION_ZSTD: +#ifdef USE_ZSTD + max_level = ZSTD_maxCLevel(); + min_level = ZSTD_minCLevel(); + default_level = ZSTD_CLEVEL_DEFAULT; +#endif + break; + case PG_COMPRESSION_NONE: + if (spec->level != 0) + return psprintf(_("compression algorithm \"%s\" does not accept a compression level"), + get_compress_algorithm_name(spec->algorithm)); + break; + } + + if ((spec->level < min_level || spec->level > max_level) && + spec->level != default_level) + return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"), + get_compress_algorithm_name(spec->algorithm), + min_level, max_level, default_level); + + /* + * Of the compression algorithms that we currently support, only zstd + * allows parallel workers. + */ + if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 && + (spec->algorithm != PG_COMPRESSION_ZSTD)) + { + return psprintf(_("compression algorithm \"%s\" does not accept a worker count"), + get_compress_algorithm_name(spec->algorithm)); + } + + /* + * Of the compression algorithms that we currently support, only zstd + * supports long-distance mode. + */ + if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 && + (spec->algorithm != PG_COMPRESSION_ZSTD)) + { + return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"), + get_compress_algorithm_name(spec->algorithm)); + } + + return NULL; +} + +#ifdef FRONTEND + +/* + * Basic parsing of a value specified through a command-line option, commonly + * -Z/--compress. + * + * The parsing consists of a METHOD:DETAIL string fed later to + * parse_compress_specification(). This only extracts METHOD and DETAIL. + * If only an integer is found, the method is implied by the value specified. + */ +void +parse_compress_options(const char *option, char **algorithm, char **detail) +{ + char *sep; + char *endp; + long result; + + /* + * Check whether the compression specification consists of a bare integer. + * + * For backward-compatibility, assume "none" if the integer found is zero + * and "gzip" otherwise. + */ + result = strtol(option, &endp, 10); + if (*endp == '\0') + { + if (result == 0) + { + *algorithm = pstrdup("none"); + *detail = NULL; + } + else + { + *algorithm = pstrdup("gzip"); + *detail = pstrdup(option); + } + return; + } + + /* + * Check whether there is a compression detail following the algorithm + * name. + */ + sep = strchr(option, ':'); + if (sep == NULL) + { + *algorithm = pstrdup(option); + *detail = NULL; + } + else + { + char *alg; + + alg = palloc((sep - option) + 1); + memcpy(alg, option, sep - option); + alg[sep - option] = '\0'; + + *algorithm = alg; + *detail = pstrdup(sep + 1); + } +} +#endif /* FRONTEND */ |