summaryrefslogtreecommitdiffstats
path: root/library/python/codecs/__codecs.pyx
blob: de8741e4e84051dc90d460d1c57a5704db29eeb0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import six

from libcpp cimport bool

from util.generic.string cimport TString, TStringBuf


def to_bytes(s):
    try:
        return s.encode('utf-8')
    except AttributeError:
        pass

    return s


def from_bytes(s):
    if six.PY3:
        return s.decode('utf-8')

    return s


cdef extern from "library/cpp/blockcodecs/codecs.h" namespace "NBlockCodecs":
    cdef cppclass ICodec:
        void Encode(TStringBuf data, TString& res) nogil except +
        void Decode(TStringBuf data, TString& res) nogil except +

    cdef const ICodec* Codec(const TStringBuf& name) except +
    cdef TString ListAllCodecsAsString() except +


def dumps(name, data):
    name = to_bytes(name)

    cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
    cdef TString res
    cdef TStringBuf cdata = TStringBuf(data, len(data))

    with nogil:
        codec.Encode(cdata, res)

    return res.c_str()[:res.length()]


def loads(name, data):
    name = to_bytes(name)

    cdef const ICodec* codec = Codec(TStringBuf(name, len(name)))
    cdef TString res
    cdef TStringBuf cdata = TStringBuf(data, len(data))

    with nogil:
        codec.Decode(cdata, res)

    return res.c_str()[:res.length()]


def get_codec_id(name):
    if name == "lz4":
        return 6051
    elif name == "snappy":
        return 50986
    elif name == "std08_1":
        return 55019
    elif name == "std08_3":
        return 23308
    elif name == "std08_7":
        return 33533
    elif name == "brotli_1":
        return 48947
    elif name == "brotli_10":
        return 43475
    elif name == "brotli_11":
        return 7241
    elif name == "brotli_2":
        return 63895
    elif name == "brotli_3":
        return 11408
    elif name == "brotli_4":
        return 47136
    elif name == "brotli_5":
        return 45284
    elif name == "brotli_6":
        return 63219
    elif name == "brotli_7":
        return 59675
    elif name == "brotli_8":
        return 40233
    elif name == "brotli_9":
        return 10380
    else:
        raise RuntimeError("Unknown code name: " + name)


def list_all_codecs():
    cdef TString res = ListAllCodecsAsString()

    return from_bytes(res.c_str()[:res.length()]).split(',')