diff options
author | robot-piglet <[email protected]> | 2025-08-28 14:27:58 +0300 |
---|---|---|
committer | robot-piglet <[email protected]> | 2025-08-28 14:57:06 +0300 |
commit | 81d828c32c8d5477cb2f0ce5da06a1a8d9392ca3 (patch) | |
tree | 3081d566f0d5158d76e9093261344f6406fd09f7 /library/python/codecs/__codecs.pyx | |
parent | 77ea11423f959e51795cc3ef36a48d808b4ffb98 (diff) |
Intermediate changes
commit_hash:d5b1af16dbe9030537a04c27eb410c88c2f496cd
Diffstat (limited to 'library/python/codecs/__codecs.pyx')
-rw-r--r-- | library/python/codecs/__codecs.pyx | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/library/python/codecs/__codecs.pyx b/library/python/codecs/__codecs.pyx new file mode 100644 index 00000000000..de8741e4e84 --- /dev/null +++ b/library/python/codecs/__codecs.pyx @@ -0,0 +1,99 @@ +import six + +from libcpp cimport bool + +from util.generic.string cimport TString, TStringBuf + + +def to_bytes(s): + try: + return s.encode('utf-8') + except AttributeError: + pass + + return s + + +def from_bytes(s): + if six.PY3: + return s.decode('utf-8') + + return s + + +cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs": + cdef cppclass ICodec: + void Encode(TStringBuf data, TString& res) nogil except + + void Decode(TStringBuf data, TString& res) nogil except + + + cdef const ICodec* Codec(const TStringBuf& name) except + + cdef TString ListAllCodecsAsString() except + + + +def dumps(name, data): + name = to_bytes(name) + + cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) + cdef TString res + cdef TStringBuf cdata = TStringBuf(data, len(data)) + + with nogil: + codec.Encode(cdata, res) + + return res.c_str()[:res.length()] + + +def loads(name, data): + name = to_bytes(name) + + cdef const ICodec* codec = Codec(TStringBuf(name, len(name))) + cdef TString res + cdef TStringBuf cdata = TStringBuf(data, len(data)) + + with nogil: + codec.Decode(cdata, res) + + return res.c_str()[:res.length()] + + +def get_codec_id(name): + if name == "lz4": + return 6051 + elif name == "snappy": + return 50986 + elif name == "std08_1": + return 55019 + elif name == "std08_3": + return 23308 + elif name == "std08_7": + return 33533 + elif name == "brotli_1": + return 48947 + elif name == "brotli_10": + return 43475 + elif name == "brotli_11": + return 7241 + elif name == "brotli_2": + return 63895 + elif name == "brotli_3": + return 11408 + elif name == "brotli_4": + return 47136 + elif name == "brotli_5": + return 45284 + elif name == "brotli_6": + return 63219 + elif name == "brotli_7": + return 59675 + elif name == "brotli_8": + return 40233 + elif name == "brotli_9": + return 10380 + else: + raise RuntimeError("Unknown code name: " + name) + + +def list_all_codecs(): + cdef TString res = ListAllCodecsAsString() + + return from_bytes(res.c_str()[:res.length()]).split(',') |