diff options
author | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 09:58:56 +0300 |
---|---|---|
committer | vitalyisaev <vitalyisaev@ydb.tech> | 2023-11-14 10:20:20 +0300 |
commit | c2b2dfd9827a400a8495e172a56343462e3ceb82 (patch) | |
tree | cd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/python/zstandard/py2/c-ext/compressionparams.c | |
parent | d4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff) | |
download | ydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz |
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/python/zstandard/py2/c-ext/compressionparams.c')
-rw-r--r-- | contrib/python/zstandard/py2/c-ext/compressionparams.c | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/contrib/python/zstandard/py2/c-ext/compressionparams.c b/contrib/python/zstandard/py2/c-ext/compressionparams.c new file mode 100644 index 0000000000..e5e8c55fea --- /dev/null +++ b/contrib/python/zstandard/py2/c-ext/compressionparams.c @@ -0,0 +1,572 @@ +/** +* Copyright (c) 2016-present, Gregory Szorc +* All rights reserved. +* +* This software may be modified and distributed under the terms +* of the BSD license. See the LICENSE file for details. +*/ + +#include "python-zstandard.h" + +extern PyObject* ZstdError; + +int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { + size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); + if (ZSTD_isError(zresult)) { + PyErr_Format(ZstdError, "unable to set compression context parameter: %s", + ZSTD_getErrorName(zresult)); + return 1; + } + + return 0; +} + +#define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1; + +#define TRY_COPY_PARAMETER(source, dest, param) { \ + int result; \ + size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ + if (ZSTD_isError(zresult)) { \ + return 1; \ + } \ + zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ + if (ZSTD_isError(zresult)) { \ + return 1; \ + } \ +} + +int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) { + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers); + + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog); + TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog); + + return 0; +} + +int reset_params(ZstdCompressionParametersObject* params) { + if (params->params) { + ZSTD_CCtxParams_reset(params->params); + } + else { + params->params = ZSTD_createCCtxParams(); + if (!params->params) { + PyErr_NoMemory(); + return 1; + } + } + + return set_parameters(params->params, params); +} + +#define TRY_GET_PARAMETER(params, param, value) { \ + size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ + if (ZSTD_isError(zresult)) { \ + PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ + return 1; \ + } \ +} + +int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) { + int value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value); + cparams->windowLog = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value); + cparams->chainLog = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value); + cparams->hashLog = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value); + cparams->searchLog = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value); + cparams->minMatch = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value); + cparams->targetLength = value; + + TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value); + cparams->strategy = value; + + return 0; +} + +static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) { + static char* kwlist[] = { + "format", + "compression_level", + "window_log", + "hash_log", + "chain_log", + "search_log", + "min_match", + "target_length", + "compression_strategy", + "strategy", + "write_content_size", + "write_checksum", + "write_dict_id", + "job_size", + "overlap_log", + "overlap_size_log", + "force_max_window", + "enable_ldm", + "ldm_hash_log", + "ldm_min_match", + "ldm_bucket_size_log", + "ldm_hash_rate_log", + "ldm_hash_every_log", + "threads", + NULL + }; + + int format = 0; + int compressionLevel = 0; + int windowLog = 0; + int hashLog = 0; + int chainLog = 0; + int searchLog = 0; + int minMatch = 0; + int targetLength = 0; + int compressionStrategy = -1; + int strategy = -1; + int contentSizeFlag = 1; + int checksumFlag = 0; + int dictIDFlag = 0; + int jobSize = 0; + int overlapLog = -1; + int overlapSizeLog = -1; + int forceMaxWindow = 0; + int enableLDM = 0; + int ldmHashLog = 0; + int ldmMinMatch = 0; + int ldmBucketSizeLog = 0; + int ldmHashRateLog = -1; + int ldmHashEveryLog = -1; + int threads = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, + "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters", + kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog, + &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy, + &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog, + &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, + &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) { + return -1; + } + + if (reset_params(self)) { + return -1; + } + + if (threads < 0) { + threads = cpu_count(); + } + + /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog + * because setting ZSTD_c_nbWorkers resets the other parameters. */ + TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads); + + TRY_SET_PARAMETER(self->params, ZSTD_c_format, format); + TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel); + TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch); + TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength); + + if (compressionStrategy != -1 && strategy != -1) { + PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy"); + return -1; + } + + if (compressionStrategy != -1) { + strategy = compressionStrategy; + } + else if (strategy == -1) { + strategy = 0; + } + + TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy); + TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag); + TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag); + TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag); + TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize); + + if (overlapLog != -1 && overlapSizeLog != -1) { + PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log"); + return -1; + } + + if (overlapSizeLog != -1) { + overlapLog = overlapSizeLog; + } + else if (overlapLog == -1) { + overlapLog = 0; + } + + TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow); + TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM); + TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog); + TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch); + TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog); + + if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) { + PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog"); + return -1; + } + + if (ldmHashEveryLog != -1) { + ldmHashRateLog = ldmHashEveryLog; + } + else if (ldmHashRateLog == -1) { + ldmHashRateLog = 0; + } + + TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog); + + return 0; +} + +PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__, +"Create a CompressionParameters from a compression level and target sizes\n" +); + +ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) { + int managedKwargs = 0; + int level; + PyObject* sourceSize = NULL; + PyObject* dictSize = NULL; + unsigned PY_LONG_LONG iSourceSize = 0; + Py_ssize_t iDictSize = 0; + PyObject* val; + ZSTD_compressionParameters params; + ZstdCompressionParametersObject* result = NULL; + int res; + + if (!PyArg_ParseTuple(args, "i:from_level", + &level)) { + return NULL; + } + + if (!kwargs) { + kwargs = PyDict_New(); + if (!kwargs) { + return NULL; + } + managedKwargs = 1; + } + + sourceSize = PyDict_GetItemString(kwargs, "source_size"); + if (sourceSize) { +#if PY_MAJOR_VERSION >= 3 + iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); + if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { + goto cleanup; + } +#else + iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize); +#endif + + PyDict_DelItemString(kwargs, "source_size"); + } + + dictSize = PyDict_GetItemString(kwargs, "dict_size"); + if (dictSize) { +#if PY_MAJOR_VERSION >= 3 + iDictSize = PyLong_AsSsize_t(dictSize); +#else + iDictSize = PyInt_AsSsize_t(dictSize); +#endif + if (iDictSize == -1) { + goto cleanup; + } + + PyDict_DelItemString(kwargs, "dict_size"); + } + + + params = ZSTD_getCParams(level, iSourceSize, iDictSize); + + /* Values derived from the input level and sizes are passed along to the + constructor. But only if a value doesn't already exist. */ + val = PyDict_GetItemString(kwargs, "window_log"); + if (!val) { + val = PyLong_FromUnsignedLong(params.windowLog); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "window_log", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "chain_log"); + if (!val) { + val = PyLong_FromUnsignedLong(params.chainLog); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "chain_log", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "hash_log"); + if (!val) { + val = PyLong_FromUnsignedLong(params.hashLog); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "hash_log", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "search_log"); + if (!val) { + val = PyLong_FromUnsignedLong(params.searchLog); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "search_log", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "min_match"); + if (!val) { + val = PyLong_FromUnsignedLong(params.minMatch); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "min_match", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "target_length"); + if (!val) { + val = PyLong_FromUnsignedLong(params.targetLength); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "target_length", val); + Py_DECREF(val); + } + + val = PyDict_GetItemString(kwargs, "compression_strategy"); + if (!val) { + val = PyLong_FromUnsignedLong(params.strategy); + if (!val) { + goto cleanup; + } + PyDict_SetItemString(kwargs, "compression_strategy", val); + Py_DECREF(val); + } + + result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType); + if (!result) { + goto cleanup; + } + + result->params = NULL; + + val = PyTuple_New(0); + if (!val) { + Py_CLEAR(result); + goto cleanup; + } + + res = ZstdCompressionParameters_init(result, val, kwargs); + Py_DECREF(val); + + if (res) { + Py_CLEAR(result); + goto cleanup; + } + +cleanup: + if (managedKwargs) { + Py_DECREF(kwargs); + } + + return result; +} + +PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__, +"Estimate the size in bytes of a compression context for compression parameters\n" +); + +PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) { + return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); +} + +PyDoc_STRVAR(ZstdCompressionParameters__doc__, +"ZstdCompressionParameters: low-level control over zstd compression"); + +static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) { + if (self->params) { + ZSTD_freeCCtxParams(self->params); + self->params = NULL; + } + + PyObject_Del(self); +} + +#define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \ + int result; \ + size_t zresult; \ + ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ + zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ + if (ZSTD_isError(zresult)) { \ + PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ + ZSTD_getErrorName(zresult)); \ + return NULL; \ + } \ + return PyLong_FromLong(result); \ +} + +PARAM_GETTER(format, ZSTD_c_format) +PARAM_GETTER(compression_level, ZSTD_c_compressionLevel) +PARAM_GETTER(window_log, ZSTD_c_windowLog) +PARAM_GETTER(hash_log, ZSTD_c_hashLog) +PARAM_GETTER(chain_log, ZSTD_c_chainLog) +PARAM_GETTER(search_log, ZSTD_c_searchLog) +PARAM_GETTER(min_match, ZSTD_c_minMatch) +PARAM_GETTER(target_length, ZSTD_c_targetLength) +PARAM_GETTER(compression_strategy, ZSTD_c_strategy) +PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag) +PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag) +PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag) +PARAM_GETTER(job_size, ZSTD_c_jobSize) +PARAM_GETTER(overlap_log, ZSTD_c_overlapLog) +PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow) +PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching) +PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog) +PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch) +PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog) +PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog) +PARAM_GETTER(threads, ZSTD_c_nbWorkers) + +static PyMethodDef ZstdCompressionParameters_methods[] = { + { + "from_level", + (PyCFunction)CompressionParameters_from_level, + METH_VARARGS | METH_KEYWORDS | METH_STATIC, + ZstdCompressionParameters_from_level__doc__ + }, + { + "estimated_compression_context_size", + (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, + METH_NOARGS, + ZstdCompressionParameters_estimated_compression_context_size__doc__ + }, + { NULL, NULL } +}; + +#define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL } + +static PyGetSetDef ZstdCompressionParameters_getset[] = { + GET_SET_ENTRY(format), + GET_SET_ENTRY(compression_level), + GET_SET_ENTRY(window_log), + GET_SET_ENTRY(hash_log), + GET_SET_ENTRY(chain_log), + GET_SET_ENTRY(search_log), + GET_SET_ENTRY(min_match), + GET_SET_ENTRY(target_length), + GET_SET_ENTRY(compression_strategy), + GET_SET_ENTRY(write_content_size), + GET_SET_ENTRY(write_checksum), + GET_SET_ENTRY(write_dict_id), + GET_SET_ENTRY(threads), + GET_SET_ENTRY(job_size), + GET_SET_ENTRY(overlap_log), + /* TODO remove this deprecated attribute */ + { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL }, + GET_SET_ENTRY(force_max_window), + GET_SET_ENTRY(enable_ldm), + GET_SET_ENTRY(ldm_hash_log), + GET_SET_ENTRY(ldm_min_match), + GET_SET_ENTRY(ldm_bucket_size_log), + GET_SET_ENTRY(ldm_hash_rate_log), + /* TODO remove this deprecated attribute */ + { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL }, + { NULL } +}; + +PyTypeObject ZstdCompressionParametersType = { + PyVarObject_HEAD_INIT(NULL, 0) + "ZstdCompressionParameters", /* tp_name */ + sizeof(ZstdCompressionParametersObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + ZstdCompressionParameters__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + ZstdCompressionParameters_methods, /* tp_methods */ + 0, /* tp_members */ + ZstdCompressionParameters_getset, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)ZstdCompressionParameters_init, /* tp_init */ + 0, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ +}; + +void compressionparams_module_init(PyObject* mod) { + Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type; + if (PyType_Ready(&ZstdCompressionParametersType) < 0) { + return; + } + + Py_INCREF(&ZstdCompressionParametersType); + PyModule_AddObject(mod, "ZstdCompressionParameters", + (PyObject*)&ZstdCompressionParametersType); + + /* TODO remove deprecated alias. */ + Py_INCREF(&ZstdCompressionParametersType); + PyModule_AddObject(mod, "CompressionParameters", + (PyObject*)&ZstdCompressionParametersType); +} |