aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-30 13:26:22 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-30 15:44:45 +0300
commit0a98fece5a9b54f16afeb3a94b3eb3105e9c3962 (patch)
tree291d72dbd7e9865399f668c84d11ed86fb190bbf /library/cpp
parentcb2c8d75065e5b3c47094067cb4aa407d4813298 (diff)
downloadydb-0a98fece5a9b54f16afeb3a94b3eb3105e9c3962.tar.gz
YQ Connector:Use docker-compose in integrational tests
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/deprecated/autoarray/README.md3
-rw-r--r--library/cpp/deprecated/autoarray/autoarray.cpp1
-rw-r--r--library/cpp/deprecated/autoarray/autoarray.h264
-rw-r--r--library/cpp/deprecated/autoarray/ya.make7
-rw-r--r--library/cpp/deprecated/fgood/README.md15
-rw-r--r--library/cpp/deprecated/fgood/ffb.cpp407
-rw-r--r--library/cpp/deprecated/fgood/ffb.h264
-rw-r--r--library/cpp/deprecated/fgood/fgood.cpp70
-rw-r--r--library/cpp/deprecated/fgood/fgood.h328
-rw-r--r--library/cpp/deprecated/fgood/fput.h79
-rw-r--r--library/cpp/deprecated/fgood/ya.make8
-rw-r--r--library/cpp/deprecated/mapped_file/mapped_file.cpp64
-rw-r--r--library/cpp/deprecated/mapped_file/ya.make7
-rw-r--r--library/cpp/eventlog/common.h10
-rw-r--r--library/cpp/eventlog/evdecoder.cpp112
-rw-r--r--library/cpp/eventlog/evdecoder.h16
-rw-r--r--library/cpp/eventlog/event_field_output.cpp68
-rw-r--r--library/cpp/eventlog/event_field_output.h29
-rw-r--r--library/cpp/eventlog/event_field_printer.cpp27
-rw-r--r--library/cpp/eventlog/event_field_printer.h38
-rw-r--r--library/cpp/eventlog/eventlog.cpp554
-rw-r--r--library/cpp/eventlog/eventlog.h623
-rw-r--r--library/cpp/eventlog/eventlog_int.cpp12
-rw-r--r--library/cpp/eventlog/eventlog_int.h72
-rw-r--r--library/cpp/eventlog/events_extension.h161
-rw-r--r--library/cpp/eventlog/iterator.cpp88
-rw-r--r--library/cpp/eventlog/iterator.h51
-rw-r--r--library/cpp/eventlog/logparser.cpp814
-rw-r--r--library/cpp/eventlog/logparser.h343
-rw-r--r--library/cpp/eventlog/proto/events_extension.proto22
-rw-r--r--library/cpp/eventlog/proto/internal.proto9
-rw-r--r--library/cpp/eventlog/proto/ya.make12
-rw-r--r--library/cpp/eventlog/threaded_eventlog.cpp1
-rw-r--r--library/cpp/eventlog/threaded_eventlog.h154
-rw-r--r--library/cpp/eventlog/ya.make29
-rw-r--r--library/cpp/fieldcalc/field_calc.cpp1136
-rw-r--r--library/cpp/fieldcalc/field_calc.h136
-rw-r--r--library/cpp/fieldcalc/field_calc_int.h593
-rw-r--r--library/cpp/fieldcalc/lossy_types.h52
-rw-r--r--library/cpp/fieldcalc/ya.make13
-rw-r--r--library/cpp/malloc/galloc/malloc-info.cpp9
-rw-r--r--library/cpp/malloc/galloc/ya.make15
-rw-r--r--library/cpp/on_disk/multi_blob/multiblob.cpp67
-rw-r--r--library/cpp/on_disk/multi_blob/multiblob.h77
-rw-r--r--library/cpp/on_disk/multi_blob/multiblob_builder.cpp146
-rw-r--r--library/cpp/on_disk/multi_blob/multiblob_builder.h64
-rw-r--r--library/cpp/on_disk/multi_blob/ya.make13
-rw-r--r--library/cpp/on_disk/st_hash/fake.cpp4
-rw-r--r--library/cpp/on_disk/st_hash/save_stl.h84
-rw-r--r--library/cpp/on_disk/st_hash/static_hash.h420
-rw-r--r--library/cpp/on_disk/st_hash/static_hash_map.h59
-rw-r--r--library/cpp/on_disk/st_hash/sthash_iterators.h334
-rw-r--r--library/cpp/on_disk/st_hash/ya.make15
-rw-r--r--library/cpp/pybind/attr.h412
-rw-r--r--library/cpp/pybind/cast.cpp324
-rw-r--r--library/cpp/pybind/cast.h373
-rw-r--r--library/cpp/pybind/embedding.cpp63
-rw-r--r--library/cpp/pybind/embedding.h10
-rw-r--r--library/cpp/pybind/empty.cpp2
-rw-r--r--library/cpp/pybind/exceptions.cpp147
-rw-r--r--library/cpp/pybind/exceptions.h143
-rw-r--r--library/cpp/pybind/init.h25
-rw-r--r--library/cpp/pybind/method.h439
-rw-r--r--library/cpp/pybind/module.cpp72
-rw-r--r--library/cpp/pybind/module.h176
-rw-r--r--library/cpp/pybind/pod.cpp18
-rw-r--r--library/cpp/pybind/pod.h53
-rw-r--r--library/cpp/pybind/ptr.h51
-rw-r--r--library/cpp/pybind/typeattrs.h368
-rw-r--r--library/cpp/pybind/typedesc.cpp79
-rw-r--r--library/cpp/pybind/typedesc.h545
-rw-r--r--library/cpp/pybind/v2.cpp43
-rw-r--r--library/cpp/pybind/v2.h514
-rw-r--r--library/cpp/pybind/ya.make14
-rw-r--r--library/cpp/remmap/remmap.cpp138
-rw-r--r--library/cpp/remmap/remmap.h64
-rw-r--r--library/cpp/remmap/ya.make7
-rw-r--r--library/cpp/sqlite3/sqlite.cpp288
-rw-r--r--library/cpp/sqlite3/sqlite.h136
-rw-r--r--library/cpp/sqlite3/ya.make13
-rw-r--r--library/cpp/streams/growing_file_input/growing_file_input.cpp40
-rw-r--r--library/cpp/streams/growing_file_input/growing_file_input.h23
-rw-r--r--library/cpp/streams/growing_file_input/ya.make11
-rw-r--r--library/cpp/string_utils/subst_buf/substbuf.cpp1
-rw-r--r--library/cpp/string_utils/subst_buf/substbuf.h63
-rw-r--r--library/cpp/string_utils/subst_buf/ya.make7
-rw-r--r--library/cpp/ucompress/README.md1
-rw-r--r--library/cpp/ucompress/common.h8
-rw-r--r--library/cpp/ucompress/reader.cpp58
-rw-r--r--library/cpp/ucompress/reader.h25
-rw-r--r--library/cpp/ucompress/writer.cpp95
-rw-r--r--library/cpp/ucompress/writer.h31
-rw-r--r--library/cpp/ucompress/ya.make18
-rw-r--r--library/cpp/zipatch/reader.cpp173
-rw-r--r--library/cpp/zipatch/reader.h48
-rw-r--r--library/cpp/zipatch/writer.cpp232
-rw-r--r--library/cpp/zipatch/writer.h51
-rw-r--r--library/cpp/zipatch/ya.make16
98 files changed, 13417 insertions, 0 deletions
diff --git a/library/cpp/deprecated/autoarray/README.md b/library/cpp/deprecated/autoarray/README.md
new file mode 100644
index 0000000000..1d83147cee
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/README.md
@@ -0,0 +1,3 @@
+Pre-C++11 vector-like container.
+
+Just use std::vector. If you need to fill your vector with custom-constructed data, use reserve+emplace_back (but make sure that your elements are movable).
diff --git a/library/cpp/deprecated/autoarray/autoarray.cpp b/library/cpp/deprecated/autoarray/autoarray.cpp
new file mode 100644
index 0000000000..15167f27f6
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/autoarray.cpp
@@ -0,0 +1 @@
+#include "autoarray.h"
diff --git a/library/cpp/deprecated/autoarray/autoarray.h b/library/cpp/deprecated/autoarray/autoarray.h
new file mode 100644
index 0000000000..2aa12c5916
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/autoarray.h
@@ -0,0 +1,264 @@
+#pragma once
+
+#include <util/system/compat.h>
+#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/system/sys_alloc.h>
+
+#include <util/generic/typetraits.h>
+#include <utility>
+
+#include <new>
+#include <util/generic/noncopyable.h>
+
+struct autoarray_getindex {
+ autoarray_getindex() = default;
+};
+
+struct aarr_b0 {
+ aarr_b0() = default;
+};
+
+struct aarr_nofill {
+ aarr_nofill() = default;
+};
+
+template <typename T>
+struct ynd_type_traits {
+ enum {
+ empty_destructor = TTypeTraits<T>::IsPod,
+ };
+};
+
+template <class T>
+class autoarray : TNonCopyable {
+protected:
+ T* arr;
+ size_t _size;
+
+private:
+ void AllocBuf(size_t siz) {
+ arr = nullptr;
+ _size = 0;
+ if (siz) {
+ arr = (T*)y_allocate(sizeof(T) * siz);
+ _size = siz;
+ }
+ }
+
+public:
+ using value_type = T;
+ using iterator = T*;
+ using const_iterator = const T*;
+
+ autoarray()
+ : arr(nullptr)
+ , _size(0)
+ {
+ }
+ autoarray(size_t siz) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T();
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(size_t siz, A& fill) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T(fill);
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ explicit autoarray(size_t siz, autoarray_getindex) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(nCurrent);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ explicit autoarray(size_t siz, aarr_b0) {
+ AllocBuf(siz);
+ memset(arr, 0, _size * sizeof(T));
+ }
+ explicit autoarray(size_t siz, aarr_nofill) {
+ AllocBuf(siz);
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t siz) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A, class B>
+ explicit autoarray(const A* fill, const B* cfill, size_t siz) {
+ AllocBuf(siz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent], cfill);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz) {
+ AllocBuf(fullsiz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ for (; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T();
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ template <class A>
+ explicit autoarray(const A* fill, size_t initsiz, size_t fullsiz, const T& dummy) {
+ AllocBuf(fullsiz);
+ size_t nCurrent = 0;
+ try {
+ for (nCurrent = 0; nCurrent < ((initsiz < _size) ? initsiz : _size); ++nCurrent)
+ new (&arr[nCurrent]) T(fill[nCurrent]);
+ for (; nCurrent < _size; ++nCurrent)
+ new (&arr[nCurrent]) T(dummy);
+ } catch (...) {
+ for (size_t n = 0; n < nCurrent; ++n)
+ arr[n].~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+
+ template <class... R>
+ explicit autoarray(size_t siz, R&&... fill) {
+ AllocBuf(siz);
+ T* curr = arr;
+ try {
+ for (T* end = arr + _size; curr != end; ++curr)
+ new (curr) T(std::forward<R>(fill)...);
+ } catch (...) {
+ for (--curr; curr >= arr; --curr)
+ curr->~T();
+ y_deallocate(arr);
+ throw;
+ }
+ }
+ ~autoarray() {
+ if (_size) {
+ if (!ynd_type_traits<T>::empty_destructor)
+ for (T *curr = arr, *end = arr + _size; curr != end; ++curr)
+ curr->~T();
+ y_deallocate(arr);
+ }
+ }
+ T& operator[](size_t pos) {
+ Y_ASSERT(pos < _size);
+ return arr[pos];
+ }
+ const T& operator[](size_t pos) const {
+ Y_ASSERT(pos < _size);
+ return arr[pos];
+ }
+ size_t size() const {
+ return _size;
+ }
+ void swap(autoarray& with) {
+ T* tmp_arr = arr;
+ size_t tmp_size = _size;
+ arr = with.arr;
+ _size = with._size;
+ with.arr = tmp_arr;
+ with._size = tmp_size;
+ }
+ void resize(size_t siz) {
+ autoarray<T> tmp(arr, _size, siz);
+ swap(tmp);
+ }
+ void resize(size_t siz, const T& dummy) {
+ autoarray<T> tmp(arr, _size, siz, dummy);
+ swap(tmp);
+ }
+ T* rawpointer() {
+ return arr;
+ }
+ const T* operator~() const {
+ return arr;
+ }
+ T* begin() {
+ return arr;
+ }
+ T* end() {
+ return arr + _size;
+ }
+ T& back() {
+ Y_ASSERT(_size);
+ return arr[_size - 1];
+ }
+ bool empty() const {
+ return !_size;
+ }
+ bool operator!() const {
+ return !_size;
+ }
+ size_t operator+() const {
+ return _size;
+ }
+ const T* begin() const {
+ return arr;
+ }
+ const T* end() const {
+ return arr + _size;
+ }
+ const T& back() const {
+ Y_ASSERT(_size);
+ return arr[_size - 1];
+ }
+ //operator T*() { return arr; }
+};
+
+template <class T>
+inline bool operator==(const autoarray<T>& a, const autoarray<T>& b) {
+ size_t count = a.size();
+ if (count != b.size())
+ return false;
+ for (size_t i = 0; i < count; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+}
diff --git a/library/cpp/deprecated/autoarray/ya.make b/library/cpp/deprecated/autoarray/ya.make
new file mode 100644
index 0000000000..4b055f8c29
--- /dev/null
+++ b/library/cpp/deprecated/autoarray/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ autoarray.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/fgood/README.md b/library/cpp/deprecated/fgood/README.md
new file mode 100644
index 0000000000..4f66289657
--- /dev/null
+++ b/library/cpp/deprecated/fgood/README.md
@@ -0,0 +1,15 @@
+Some ancient wrappers on top of FILE*, and some string manupulation functions.
+
+Alternatives are as follows.
+
+For TFILEPtr. Use TIFStream or TOFStream if you need IO. For some rare use cases a TFileMap might also do.
+
+For fput/fget/getline. Use streams API.
+
+For struct ffb and struct prnstr. Just don't use them. Even if you can figure out what they do.
+
+For sf family of functions and TLineSplitter. Just use Split* from util/string/split.h
+
+For TSFReader. Use TMapTsvFile.
+
+For read_or_die family of functions. Use streams API.
diff --git a/library/cpp/deprecated/fgood/ffb.cpp b/library/cpp/deprecated/fgood/ffb.cpp
new file mode 100644
index 0000000000..aa9da861a6
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ffb.cpp
@@ -0,0 +1,407 @@
+#include "ffb.h"
+
+#include <util/string/util.h> // str_spn
+#include <util/system/compat.h>
+#include <util/generic/yexception.h>
+
+#include <cstdio>
+#include <algorithm>
+
+#include <ctype.h>
+
+#ifdef _win_
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+ffb::ffb(FILE* file)
+ : TFILEPtr(file)
+{
+ if (file && !isatty(fileno(file)) && BUFSIZ < 512 * 1024)
+ setvbuf(file, nullptr, _IOFBF, 512 * 1024);
+}
+
+void ffb::operator=(FILE* f) {
+ TFILEPtr::operator=(f);
+ if (f && !isatty(fileno(f)) && BUFSIZ < 512 * 1024)
+ setvbuf(f, nullptr, _IOFBF, 512 * 1024);
+}
+
+void ffb::open(const char* name, const char* mode) {
+ TFILEPtr::open(name, mode);
+ if (!isatty(fileno(*this)) && BUFSIZ < 512 * 1024)
+ setvbuf(*this, nullptr, _IOFBF, 512 * 1024);
+}
+
+int sf(char** fb, char* buf) { //don't want to call sf(fb, buf, 32)
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < 31) {
+ if (*buf == '\t') {
+ *buf++ = 0;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(char** fb, char* buf, size_t fb_sz) {
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == '\t') {
+ *buf++ = 0;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+inline int sf_blank(char** fb, char* buf, size_t fb_sz) {
+ while (isspace((ui8)*buf))
+ buf++;
+ if (!*buf) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (isspace((ui8)*buf)) {
+ *buf++ = 0;
+ while (isspace((ui8)*buf))
+ buf++;
+ if (*buf)
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(char fs, char** fb, char* buf, size_t fb_sz) {
+ if (fs == ' ')
+ return sf_blank(fb, buf, fb_sz);
+ while (*buf == fs)
+ buf++;
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == fs) {
+ *buf++ = 0;
+ while (*buf == fs)
+ buf++;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+int sf(const char* fs, char** fb, char* buf, size_t fb_sz) {
+ if (!(*buf && *buf != 10)) {
+ *fb = nullptr;
+ return 0;
+ }
+ int fs_len = strlen(fs);
+ fb_sz--;
+ int n = 1;
+ fb[0] = buf;
+ while (*buf && *buf != 10 && n < (int)fb_sz) {
+ if (*buf == *fs && !strncmp(buf + 1, fs + 1, fs_len - 1)) {
+ *buf = 0;
+ buf += fs_len;
+ fb[n++] = buf;
+ continue;
+ }
+ buf++;
+ }
+ if (*buf == 10 && buf[-1] == 13)
+ buf[-1] = 0;
+ *buf = 0;
+ fb[n] = nullptr;
+ return n;
+}
+
+inline bool is_end(const char* p) {
+ return !p || !p[0];
+}
+
+int sf(const char* seps, char* buf, char** fb, size_t fb_sz) {
+ if (fb_sz < 1 || is_end(buf)) {
+ *fb = nullptr;
+ return 0;
+ }
+ str_spn sseps(seps);
+ fb[0] = nullptr;
+ int n = 0;
+ // skip leading delimeters
+ buf = sseps.cbrk(buf);
+ if (is_end(buf))
+ return 0;
+ // store fields
+ while (n < (int)fb_sz) {
+ fb[n++] = buf;
+ // find delimeters
+ buf = sseps.brk(buf + 1);
+ if (is_end(buf))
+ break;
+ *buf = 0;
+ // skip delimiters
+ buf = sseps.cbrk(buf + 1);
+ if (is_end(buf))
+ break;
+ }
+ fb[n] = nullptr;
+ return n;
+}
+
+void TLineSplitter::operator()(char* p, TVector<char*>& fields) const {
+ if (!p || !*p)
+ return;
+ char* q = p;
+ while (1) {
+ p = Sep.brk(p);
+ if (q && (p - q || !SkipEmpty()))
+ fields.push_back(q);
+ q = nullptr;
+ if (!*p)
+ break;
+ if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) {
+ *p = 0;
+ p += SepStrLen;
+ q = p;
+ } else
+ p++;
+ }
+}
+
+void TLineSplitter::operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const {
+ if (!p || !*p)
+ return;
+ const char* q = p;
+ while (1) {
+ p = Sep.brk(p);
+ if (q && (p - q || !SkipEmpty()))
+ fields.push_back(std::make_pair(q, p - q));
+ q = nullptr;
+ if (!*p)
+ break;
+ if (SepStrLen == 1 || (SepStrLen > 1 && !strncmp(p + 1, SepStr + 1, SepStrLen - 1))) {
+ p += SepStrLen;
+ q = p;
+ } else
+ p++;
+ }
+}
+
+TSFReader::TSFReader(const char* fname, char sep, i32 nfrq) // if sep == ' ' isspace will be imitated (for compat)
+ : Split(str_spn(sep == ' ' ? "\t\n\v\f\r " : TString(1, sep).data()), sep == ' ')
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+TSFReader::TSFReader(const char* fname, const char* sep, i32 nfrq)
+ : Split(sep, false)
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+TSFReader::TSFReader(const char* fname, const TLineSplitter& spl, i32 nfrq)
+ : Split(spl)
+ , OpenPipe(false)
+{
+ Open(fname, nfrq);
+}
+
+void TSFReader::Open(const char* fname, i32 nfrq, size_t vbuf_size) {
+ FieldsRequired = nfrq;
+ NF = NR = 0;
+
+ if (IsOpen())
+ File.close();
+
+ if (!fname)
+ return;
+
+ if (!strcmp(fname, "/dev/stdin")) {
+ File.assign(stdin, "/dev/stdin");
+ } else {
+ if (OpenPipe)
+ File.popen(fname, "r");
+ else
+ File.open(fname, "r");
+ }
+ OpenPipe = false;
+ if (!isatty(fileno(File)))
+ setvbuf(File, nullptr, _IOFBF, vbuf_size);
+}
+
+void TSFReader::Popen(const char* pname, i32 nfrq, size_t vbuf_size) {
+ OpenPipe = true;
+ Open(pname, nfrq, vbuf_size);
+}
+
+bool TSFReader::NextLine(segmented_string_pool* pool) {
+ size_t line_len = 0;
+
+#ifdef __FreeBSD__
+ char* ptr = fgetln(File, &line_len);
+ if (!ptr)
+ return false;
+ if (!line_len || ptr[line_len - 1] != '\n') { // last line w/o newline
+ Buf.AssignNoAlias(ptr, line_len);
+ ptr = Buf.begin();
+ } else {
+ // can safely replace newline with \0
+ ptr[line_len - 1] = 0;
+ --line_len;
+ }
+#else
+ if (!getline(File, Buf))
+ return false;
+ char* ptr = Buf.begin();
+ line_len = Buf.size();
+#endif
+ if (line_len && ptr[line_len - 1] == '\r')
+ ptr[line_len - 1] = 0;
+
+ if (pool) {
+ char* nptr = pool->append(ptr);
+ Y_ASSERT(!strcmp(ptr, nptr));
+ ptr = nptr;
+ }
+
+ ++NR;
+ Fields.clear();
+ Split(ptr, Fields);
+ NF = Fields.size();
+
+ if (FieldsRequired != -1 && FieldsRequired != (int)NF)
+ ythrow yexception() << File.name() << " line " << NR << ": " << NF << " fields, expected " << FieldsRequired;
+
+ return true;
+}
+
+int prnstr::f(const char* c, ...) {
+ va_list params;
+ int n = asize - pos, k;
+ va_start(params, c);
+ while ((k = vsnprintf(buf + pos, n, c, params)) >= n) {
+ n += asize, asize *= 2;
+ while (k + pos >= n)
+ n += asize, asize *= 2;
+ char* t = new char[asize];
+ memcpy(t, buf, pos);
+ delete[] buf;
+ buf = t;
+ va_end(params);
+ va_start(params, c);
+ }
+ pos += k;
+ va_end(params);
+ return k;
+}
+int prnstr::s(const char* c, size_t k) {
+ if (!c)
+ return 0;
+ size_t n = asize - pos;
+ if (k >= n) {
+ n += asize, asize *= 2;
+ while (k + pos >= n)
+ n += asize, asize *= 2;
+ char* t = new char[asize];
+ memcpy(t, buf, pos);
+ delete[] buf;
+ buf = t;
+ }
+ memcpy(buf + pos, c, k);
+ pos += k;
+ buf[pos] = 0;
+ return k;
+}
+void prnstr::clear() {
+ pos = 0;
+ if (asize > 32768) {
+ asize = 32768;
+ delete[] buf;
+ buf = new char[asize];
+ }
+}
+
+void prnstr::swap(prnstr& w) {
+ std::swap(buf, w.buf);
+ std::swap(pos, w.pos);
+ std::swap(asize, w.asize);
+}
+
+FILE* read_or_die(const char* fname) {
+ FILE* f = fopen(fname, "rb");
+ if (!f)
+ err(1, "%s", fname);
+ return f;
+}
+FILE* write_or_die(const char* fname) {
+ FILE* f = fopen(fname, "wb");
+ if (!f)
+ err(1, "%s", fname);
+ return f;
+}
+FILE* fopen_or_die(const char* fname, const char* mode) {
+ FILE* f = fopen(fname, mode);
+ if (!f)
+ err(1, "%s (mode '%s')", fname, mode);
+ return f;
+}
+
+FILE* fopen_chk(const char* fname, const char* mode) {
+ FILE* f = fopen(fname, mode);
+ if (!f)
+ ythrow yexception() << fname << " (mode '" << mode << "'): " << LastSystemErrorText();
+ return f;
+}
+
+void fclose_chk(FILE* f, const char* fname) {
+ if (fclose(f))
+ ythrow yexception() << "file " << fname << ": " << LastSystemErrorText();
+}
diff --git a/library/cpp/deprecated/fgood/ffb.h b/library/cpp/deprecated/fgood/ffb.h
new file mode 100644
index 0000000000..ca229eb65a
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ffb.h
@@ -0,0 +1,264 @@
+#pragma once
+
+#include "fgood.h"
+
+#include <util/string/util.h> // str_spn
+#include <util/string/split.h> // str_spn
+#include <util/memory/segmented_string_pool.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/noncopyable.h>
+
+#include <utility>
+
+#include <cstdarg>
+#include <cstring>
+
+struct ffb: public TFILEPtr {
+ ffb() {
+ }
+ ffb(FILE* file);
+ ffb(const char* name, const char* mode) {
+ open(name, mode);
+ }
+ void operator=(FILE* f); // take ownership
+ void open(const char* name, const char* mode);
+ int f(const char* c, ...) {
+ va_list args;
+ va_start(args, c);
+ return vfprintf(*this, c, args);
+ }
+ void s(const char* c) {
+ fsput(c, strlen(c));
+ }
+ void b(const void* cc, int n) {
+ fsput((const char*)cc, n);
+ }
+ void B(const void* cc, int N) {
+ fsput((const char*)cc, N);
+ }
+ void c(char c) {
+ fputc(c);
+ }
+ void cbe(wchar16 c) { // big endian utf-16
+ fputc(char(c >> 8)); //Hi8
+ fputc(char(c & 255)); //Lo8
+ }
+ void sbe(const wchar16* c) {
+ for (; *c; c++)
+ cbe(*c);
+ }
+ void fclose() {
+ close();
+ }
+};
+
+// split fields of tab-delimited line of text
+// here and below fb actual size must be fb_sz + 1 to allow fb[fb_sz] be zero
+int sf(char** fb, char* buf, size_t fb_sz);
+int sf(char** fb, char* buf /* fb_sz == 32 */);
+
+// split fields of char-delimited line of text
+// Achtung: delim = ' ' imitates awk: initial separators are skipped,
+// repeated seps treated as one, all chars less than ' ' treated as separators.
+int sf(char fs, char** fb, char* buf, size_t fb_sz = 32);
+
+// split fields of string-delimited line of text (fs is NOT a regexp)
+// (usually fs is "@@")
+int sf(const char* fs, char** fb, char* buf, size_t fb_sz = 32);
+
+// split fields of char-delimited line of text, set of char-separators is given
+// Achtung: repeated seps treated as one, initial seps are skipped
+// newlines are NOT ignored.
+int sf(const char* seps, char* buf, char** fb, size_t fb_sz = 32);
+
+inline char* chomp(char* buf) {
+ char* c = buf + strlen(buf);
+ if (c > buf && c[-1] == '\n') {
+ *--c = 0;
+#ifdef _win32_
+ if (c > buf && c[-1] == '\r')
+ *--c = 0;
+#endif
+ }
+ return buf;
+}
+
+inline char* chomp_cr(char* buf) {
+ char* c = buf + strlen(buf);
+ if (c > buf && c[-1] == '\n')
+ *--c = 0;
+ if (c > buf && c[-1] == '\r')
+ *--c = 0;
+ return buf;
+}
+
+class TLineSplitter {
+protected:
+ enum { // Default: Split string by SepStr
+ SplitByAnySep = 1, // Split string by Sep
+ NoEmptyFields = 2 // Skip all empty fields between separators
+ };
+
+private:
+ ui32 Flags;
+ const str_spn Sep; // collection of separators
+ const char* SepStr; // pointer exact string to separate by
+ size_t SepStrLen; // length of separator string
+
+public:
+ TLineSplitter(const char* sep, bool noEmpty)
+ : Flags(noEmpty ? NoEmptyFields : 0)
+ , Sep(TString(sep, 1).data())
+ , SepStr(sep)
+ , SepStrLen(strlen(sep))
+ {
+ }
+ TLineSplitter(const str_spn& sep, bool noEmpty = false)
+ : Flags(SplitByAnySep | (noEmpty ? NoEmptyFields : 0))
+ , Sep(sep)
+ , SepStr(nullptr)
+ , SepStrLen(1)
+ {
+ }
+ bool AnySep() const {
+ return Flags & SplitByAnySep;
+ }
+ bool SkipEmpty() const {
+ return Flags & NoEmptyFields;
+ }
+ /// Separates string onto tokens
+ /// Expecting a zero-terminated string
+ /// By default returns empty fields between sequential separators
+ void operator()(char* p, TVector<char*>& fields) const;
+ /// Same, but for const string - fills vector of pairs (pointer, length)
+ void operator()(const char* p, TVector<std::pair<const char*, size_t>>& fields) const;
+};
+
+/**
+ * Use library/cpp/map_text_file/map_tsv_file.h instead.
+ */
+class TSFReader {
+ TString Buf; // buffer used for non-'\n'-terminated string and for non-freebsd work
+ TLineSplitter Split;
+ TVector<char*> Fields;
+ size_t NF; // Fields.size()
+ size_t NR;
+
+ TFILEPtr File;
+
+ bool OpenPipe; // internal flag that turns open() to popen()
+
+ i32 FieldsRequired; // if != -1, != nf, terminate program
+
+public:
+ // char separator
+ // Achtung: delim = ' ' imitates awk: initial separators are skipped,
+ // all chars less than ' ' treated as separators.
+ TSFReader(const char* fname = nullptr, char sep = '\t', i32 nf_reqired = -1);
+ // exact string separator
+ TSFReader(const char* fname, const char* sep, i32 nf_reqired = -1);
+ // fully customizable
+ TSFReader(const char* fname, const TLineSplitter& spl, i32 nf_reqired = -1);
+
+ void Open(const char* fname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21); // use "/dev/stdin" for stdin
+ void Popen(const char* pname, i32 nf_reqired = -1, size_t vbufsize = 1u << 21);
+
+ bool NextLine(segmented_string_pool* pool = nullptr);
+
+ bool IsOpen() const {
+ return (FILE*)File != nullptr;
+ }
+ bool IsEof() const {
+ return feof(File);
+ }
+ void Close() {
+ File.close();
+ }
+ void Rewind() {
+ File.seek(0, SEEK_SET);
+ }
+ void Seek(i64 offset, int mode = SEEK_SET) {
+ File.seek(offset, mode);
+ }
+ i64 Tell() const {
+ return ftell(File);
+ }
+ char*& operator[](size_t ind) {
+ //if (ind >= NF)
+ // throw yexception("Can't return reference to unexisting field %" PRISZT, ind);
+ return Fields[ind];
+ }
+ const char* operator[](size_t ind) const {
+ if (ind >= NF)
+ return nullptr;
+ return Fields[ind];
+ }
+ operator int() const { // note: empty input line makes 0 fields
+ return (int)NF;
+ }
+ const char* Name() const {
+ return File.name().data();
+ }
+ size_t Line() const {
+ return NR;
+ }
+ const TVector<char*>& GetFields() const {
+ return Fields;
+ }
+};
+
+struct prnstr {
+ char* buf;
+ int pos;
+ int asize;
+ prnstr()
+ : pos(0)
+ {
+ asize = 32;
+ buf = new char[asize];
+ }
+ explicit prnstr(int asz)
+ : pos(0)
+ {
+ asize = asz;
+ buf = new char[asize];
+ }
+ int f(const char* c, ...);
+ int s(const char* c1, const char* c2);
+ int s(const char* c1, const char* c2, const char* c3);
+ int s(const char* c, size_t len);
+ //int s(const char *c);
+ int s(const char* c) {
+ return c ? s(c, strlen(c)) : 0;
+ }
+ int s(const TString& c);
+ int s_htmesc(const char* c, bool enc_utf = false);
+ int s_htmesc_w(const char* c);
+ int c(char c);
+ int cu(wchar32 c); //for utf-8
+ void restart() {
+ *buf = 0;
+ pos = 0;
+ }
+ const char* operator~() const {
+ return buf;
+ }
+ int operator+() const {
+ return pos;
+ }
+ ~prnstr() {
+ delete[] buf;
+ }
+ void clear();
+ void swap(prnstr& w);
+};
+
+// functions that terminate program upon failure
+FILE* read_or_die(const char* fname);
+FILE* write_or_die(const char* fname);
+FILE* fopen_or_die(const char* fname, const char* mode);
+
+// functions that throw upon failure
+FILE* fopen_chk(const char* fname, const char* mode);
+void fclose_chk(FILE* f, const char* fname_dbg);
diff --git a/library/cpp/deprecated/fgood/fgood.cpp b/library/cpp/deprecated/fgood/fgood.cpp
new file mode 100644
index 0000000000..5d4725bfae
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fgood.cpp
@@ -0,0 +1,70 @@
+#include "fgood.h"
+
+#include <util/generic/cast.h>
+#include <util/string/cast.h>
+#include <util/system/fstat.h>
+
+#ifdef _win32_
+#include <io.h>
+#endif
+
+i64 TFILEPtr::length() const {
+#ifdef _win32_
+ FHANDLE fd = (FHANDLE)_get_osfhandle(fileno(m_file));
+#else
+ FHANDLE fd = fileno(m_file);
+#endif
+ i64 rv = GetFileLength(fd);
+ if (rv < 0)
+ ythrow yexception() << "TFILEPtr::length() " << Name.data() << ": " << LastSystemErrorText();
+ return rv;
+}
+
+FILE* OpenFILEOrFail(const TString& name, const char* mode) {
+ FILE* res = ::fopen(name.data(), mode);
+ if (!res) {
+ ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ }
+ return res;
+}
+
+void TFILECloser::Destroy(FILE* file) {
+ ::fclose(file);
+}
+
+#ifdef _freebsd_ // fgetln
+#define getline getline_alt_4test
+#endif // _freebsd_
+
+bool getline(TFILEPtr& f, TString& s) {
+ char buf[4096];
+ char* buf_ptr;
+ if (s.capacity() > sizeof(buf)) {
+ s.resize(s.capacity());
+ if ((buf_ptr = fgets(s.begin(), IntegerCast<int>(s.capacity()), f)) == nullptr)
+ return false;
+ } else {
+ if ((buf_ptr = fgets(buf, sizeof(buf), f)) == nullptr)
+ return false;
+ }
+ size_t buf_len = strlen(buf_ptr);
+ bool line_complete = buf_len && buf_ptr[buf_len - 1] == '\n';
+ if (line_complete)
+ buf_len--;
+ if (buf_ptr == s.begin())
+ s.resize(buf_len);
+ else
+ s.AssignNoAlias(buf, buf_len);
+ if (line_complete)
+ return true;
+ while (fgets(buf, sizeof(buf), f)) {
+ size_t buf_len2 = strlen(buf);
+ if (buf_len2 && buf[buf_len2 - 1] == '\n') {
+ buf[buf_len2 - 1] = 0;
+ s.append(buf, buf_len2 - 1);
+ return true;
+ }
+ s.append(buf, buf_len2);
+ }
+ return true;
+}
diff --git a/library/cpp/deprecated/fgood/fgood.h b/library/cpp/deprecated/fgood/fgood.h
new file mode 100644
index 0000000000..0aaf910c0f
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fgood.h
@@ -0,0 +1,328 @@
+#pragma once
+
+#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/generic/ptr.h>
+
+#include "fput.h"
+
+#include <cstdio>
+
+#include <fcntl.h>
+
+#ifdef _unix_
+extern "C" int __ungetc(int, FILE*);
+#endif
+
+#if (!defined(__FreeBSD__) && !defined(__linux__) && !defined(_darwin_) && !defined(_cygwin_)) || defined(_bionic_)
+#define feof_unlocked(_stream) feof(_stream)
+#define ferror_unlocked(_stream) ferror(_stream)
+#endif
+
+#ifndef _unix_
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+#define getc_unlocked(_stream) (--(_stream)->_cnt >= 0 ? 0xff & *(_stream)->_ptr++ : _filbuf(_stream))
+#define putc_unlocked(_c, _stream) (--(_stream)->_cnt >= 0 ? 0xff & (*(_stream)->_ptr++ = (char)(_c)) : _flsbuf((_c), (_stream)))
+#else
+#define getc_unlocked(_stream) getc(_stream)
+#define putc_unlocked(_c, _stream) putc(_c, _stream)
+#endif
+#endif
+
+inline bool fgood(FILE* f) {
+ return !feof_unlocked(f) && !ferror_unlocked(f);
+}
+
+#ifdef _win32_
+// These functions will work only with static MSVC runtime linkage. For dynamic linkage,
+// fseeki64.c and ftelli64.c from CRT sources should be included in project
+extern "C" int __cdecl _fseeki64(FILE*, __int64, int);
+extern "C" __int64 __cdecl _ftelli64(FILE*);
+
+inline i64 ftello(FILE* stream) {
+ return _ftelli64(stream);
+}
+
+inline int fseeko(FILE* stream, i64 offset, int origin) {
+ return _fseeki64(stream, offset, origin);
+}
+#endif
+
+class TFILEPtr {
+private:
+ enum { SHOULD_CLOSE = 1,
+ IS_PIPE = 2 };
+ FILE* m_file;
+ int m_Flags;
+ TString Name;
+
+public:
+ TFILEPtr() noexcept {
+ m_file = nullptr;
+ m_Flags = 0;
+ }
+ TFILEPtr(const TString& name, const char* mode) {
+ m_file = nullptr;
+ m_Flags = 0;
+ open(name, mode);
+ }
+ TFILEPtr(const TFILEPtr& src) noexcept {
+ m_file = src.m_file;
+ m_Flags = 0;
+ }
+ TFILEPtr& operator=(const TFILEPtr& src) {
+ if (src.m_file != m_file) {
+ close();
+ m_file = src.m_file;
+ m_Flags = 0;
+ }
+ return *this;
+ }
+ explicit TFILEPtr(FILE* f) noexcept { // take ownership
+ m_file = f;
+ m_Flags = SHOULD_CLOSE;
+ }
+ TFILEPtr& operator=(FILE* f) { // take ownership
+ if (f != m_file) {
+ close();
+ m_file = f;
+ m_Flags = SHOULD_CLOSE;
+ }
+ return *this;
+ }
+ const TString& name() const {
+ return Name;
+ }
+ operator FILE*() const noexcept {
+ return m_file;
+ }
+ FILE* operator->() const noexcept {
+ return m_file;
+ }
+ bool operator!() const noexcept {
+ return m_file == nullptr;
+ }
+ bool operator!=(FILE* f) const noexcept {
+ return m_file != f;
+ }
+ bool operator==(FILE* f) const noexcept {
+ return m_file == f;
+ }
+ ~TFILEPtr() {
+ close();
+ }
+ void Y_PRINTF_FORMAT(2, 3) check(const char* message, ...) const {
+ if (Y_UNLIKELY(!fgood(m_file))) {
+ va_list args;
+ va_start(args, message);
+ char buf[512];
+ vsnprintf(buf, 512, message, args);
+ // XXX: errno is undefined here
+ ythrow yexception() << buf << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ }
+ }
+ TFILEPtr& assign(FILE* f, const char* name = nullptr) { // take ownership and have a name
+ *this = f;
+ if (name)
+ Name = name;
+ return *this;
+ }
+ void open(const TString& name, const char* mode) {
+ Y_ASSERT(!name.empty());
+ Y_ASSERT(m_file == nullptr);
+ m_file = ::fopen(name.data(), mode);
+ if (!m_file)
+ ythrow yexception() << "can't open \'" << name << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ m_Flags = SHOULD_CLOSE;
+ Name = name;
+ }
+ void popen(const TString& command, const char* mode) {
+ Y_ASSERT(!command.empty());
+ Y_ASSERT(m_file == nullptr);
+ m_file = ::popen(command.data(), mode);
+ if (!m_file)
+ ythrow yexception() << "can't execute \'" << command << "\' with mode \'" << mode << "\': " << LastSystemErrorText();
+ m_Flags = IS_PIPE | SHOULD_CLOSE;
+ Name = command;
+ }
+ void close() {
+ if (m_file != nullptr && (m_Flags & SHOULD_CLOSE)) {
+ if ((m_Flags & IS_PIPE) ? ::pclose(m_file) : ::fclose(m_file)) {
+ m_file = nullptr;
+ m_Flags = 0;
+ if (!UncaughtException())
+ ythrow yexception() << "can't close file " << Name.data() << ": " << LastSystemErrorText();
+ }
+ }
+ m_file = nullptr;
+ m_Flags = 0;
+ Name.clear();
+ }
+ size_t write(const void* buffer, size_t size, size_t count) const {
+ Y_ASSERT(m_file != nullptr);
+ size_t r = ::fwrite(buffer, size, count, m_file);
+ check("can't write %lu bytes", (unsigned long)size * count);
+ return r;
+ }
+ size_t read(void* buffer, size_t size, size_t count) const {
+ Y_ASSERT(m_file != nullptr);
+ size_t r = ::fread(buffer, size, count, m_file);
+ if (ferror_unlocked(m_file))
+ ythrow yexception() << "can't read " << (unsigned long)size * count << " bytes: " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ return r;
+ }
+ char* fgets(char* buffer, int size) const {
+ Y_ASSERT(m_file != nullptr);
+ char* r = ::fgets(buffer, size, m_file);
+ if (ferror_unlocked(m_file))
+ ythrow yexception() << "can't read string of maximum size " << size << ": " << LastSystemErrorText() << ", " << Name.data() << " at offset " << (i64)ftell();
+ return r;
+ }
+ void Y_PRINTF_FORMAT(2, 3) fprintf(const char* format, ...) {
+ Y_ASSERT(m_file != nullptr);
+ va_list args;
+ va_start(args, format);
+ vfprintf(m_file, format, args);
+ check("can't write");
+ }
+ void seek(i64 offset, int origin) const {
+ Y_ASSERT(m_file != nullptr);
+#if defined(_unix_) || defined(_win32_)
+ if (fseeko(m_file, offset, origin) != 0)
+#else
+ Y_ASSERT(offset == (i64)(i32)offset);
+ if (::fseek(m_file, (long)offset, origin) != 0)
+#endif
+ ythrow yexception() << "can't seek " << Name.data() << " by " << offset << ": " << LastSystemErrorText();
+ }
+ i64 length() const; // uses various system headers -> in fileptr.cpp
+
+ void setDirect() const {
+#if !defined(_win_) && !defined(_darwin_)
+ if (!m_file)
+ ythrow yexception() << "file not open";
+ if (fcntl(fileno(m_file), F_SETFL, O_DIRECT) == -1)
+ ythrow yexception() << "Cannot set O_DIRECT flag";
+#endif
+ }
+
+ // for convenience
+
+ i64 ftell() const noexcept {
+#if defined(_unix_) || defined(_win32_)
+ return ftello(m_file);
+#else
+ return ftell(m_file);
+#endif
+ }
+ bool eof() const noexcept {
+ Y_ASSERT(m_file != nullptr);
+ return feof_unlocked(m_file) != 0;
+ }
+ int fputc(int c) {
+ Y_ASSERT(m_file != nullptr);
+ return putc_unlocked(c, m_file);
+ }
+ size_t fputs(const char* buffer) const {
+ return write(buffer, strlen(buffer), 1);
+ }
+ int fgetc() {
+ Y_ASSERT(m_file != nullptr);
+ return getc_unlocked(m_file);
+ }
+ int ungetc(int c) {
+ Y_ASSERT(m_file != nullptr);
+ return ::ungetc(c, m_file);
+ }
+ template <class T>
+ size_t fput(const T& a) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fput(m_file, a);
+ }
+ template <class T>
+ size_t fget(T& a) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fget(m_file, a);
+ }
+ size_t fsput(const char* s, size_t l) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fsput(m_file, s, l);
+ }
+ size_t fsget(char* s, size_t l) {
+ Y_ASSERT(m_file != nullptr);
+ return ::fsget(m_file, s, l);
+ }
+
+ void fflush() {
+ ::fflush(m_file);
+ }
+
+ /* This block contains some TFile/TStream - compatible names */
+ size_t Read(void* bufferIn, size_t numBytes) {
+ size_t r = fsget((char*)bufferIn, numBytes);
+ if (Y_UNLIKELY(ferror_unlocked(m_file)))
+ ythrow yexception() << "can't read " << numBytes << " bytes: " << LastSystemErrorText() << ", " << Name << " at offset " << (i64)ftell();
+ return r;
+ }
+ void Write(const void* buffer, size_t numBytes) {
+ write(buffer, 1, numBytes);
+ }
+ i64 Seek(i64 offset, int origin /*SeekDir*/) {
+ seek(offset, origin);
+ return ftell();
+ }
+ i64 GetPosition() const noexcept {
+ return ftell();
+ }
+ i64 GetLength() const noexcept {
+ return length();
+ }
+ bool ReadLine(TString& st);
+
+ /* Similar to TAutoPtr::Release - return pointer and forget about it. */
+ FILE* Release() noexcept {
+ FILE* result = m_file;
+ m_file = nullptr;
+ m_Flags = 0;
+ Name.clear();
+ return result;
+ }
+};
+
+inline void fclose(TFILEPtr& F) {
+ F.close();
+}
+
+inline void fseek(const TFILEPtr& F, i64 offset, int whence) {
+ F.seek(offset, whence);
+}
+
+#ifdef _freebsd_ // fgetln
+inline bool getline(TFILEPtr& f, TString& s) {
+ size_t len;
+ char* buf = fgetln(f, &len);
+ if (!buf)
+ return false;
+ if (len && buf[len - 1] == '\n')
+ len--;
+ s.AssignNoAlias(buf, len);
+ return true;
+}
+#else
+bool getline(TFILEPtr& f, TString& s);
+#endif //_freebsd_
+
+inline bool TFILEPtr::ReadLine(TString& st) {
+ return getline(*this, st);
+}
+
+FILE* OpenFILEOrFail(const TString& name, const char* mode);
+
+//Should be used with THolder
+struct TFILECloser {
+ static void Destroy(FILE* file);
+};
+
+using TFILEHolder = THolder<FILE, TFILECloser>;
diff --git a/library/cpp/deprecated/fgood/fput.h b/library/cpp/deprecated/fgood/fput.h
new file mode 100644
index 0000000000..690b06332d
--- /dev/null
+++ b/library/cpp/deprecated/fgood/fput.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/valgrind.h>
+
+#include <cstdio>
+
+#ifdef __FreeBSD__
+#include <cstring>
+
+template <class T>
+Y_FORCE_INLINE size_t fput(FILE* F, const T& a) {
+ if (Y_LIKELY(F->_w >= int(sizeof(a)))) {
+ memcpy(F->_p, &a, sizeof(a));
+ F->_p += sizeof(a);
+ F->_w -= sizeof(a);
+ return 1;
+ } else {
+ return fwrite(&a, sizeof(a), 1, F);
+ }
+}
+
+template <class T>
+Y_FORCE_INLINE size_t fget(FILE* F, T& a) {
+ if (Y_LIKELY(F->_r >= int(sizeof(a)))) {
+ memcpy(&a, F->_p, sizeof(a));
+ F->_p += sizeof(a);
+ F->_r -= sizeof(a);
+ return 1;
+ } else {
+ return fread(&a, sizeof(a), 1, F);
+ }
+}
+
+inline size_t fsput(FILE* F, const char* s, size_t l) {
+ VALGRIND_CHECK_READABLE(s, l);
+
+ if ((size_t)F->_w >= l) {
+ memcpy(F->_p, s, l);
+ F->_p += l;
+ F->_w -= l;
+ return l;
+ } else {
+ return fwrite(s, 1, l, F);
+ }
+}
+
+inline size_t fsget(FILE* F, char* s, size_t l) {
+ if ((size_t)F->_r >= l) {
+ memcpy(s, F->_p, l);
+ F->_p += l;
+ F->_r -= l;
+ return l;
+ } else {
+ return fread(s, 1, l, F);
+ }
+}
+#else
+template <class T>
+Y_FORCE_INLINE size_t fput(FILE* F, const T& a) {
+ return fwrite(&a, sizeof(a), 1, F);
+}
+
+template <class T>
+Y_FORCE_INLINE size_t fget(FILE* F, T& a) {
+ return fread(&a, sizeof(a), 1, F);
+}
+
+inline size_t fsput(FILE* F, const char* s, size_t l) {
+#ifdef WITH_VALGRIND
+ VALGRIND_CHECK_READABLE(s, l);
+#endif
+ return fwrite(s, 1, l, F);
+}
+
+inline size_t fsget(FILE* F, char* s, size_t l) {
+ return fread(s, 1, l, F);
+}
+#endif
diff --git a/library/cpp/deprecated/fgood/ya.make b/library/cpp/deprecated/fgood/ya.make
new file mode 100644
index 0000000000..2394f9ad7a
--- /dev/null
+++ b/library/cpp/deprecated/fgood/ya.make
@@ -0,0 +1,8 @@
+LIBRARY()
+
+SRCS(
+ ffb.cpp
+ fgood.cpp
+)
+
+END()
diff --git a/library/cpp/deprecated/mapped_file/mapped_file.cpp b/library/cpp/deprecated/mapped_file/mapped_file.cpp
new file mode 100644
index 0000000000..b0e4511299
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/mapped_file.cpp
@@ -0,0 +1,64 @@
+#include "mapped_file.h"
+
+#include <util/generic/yexception.h>
+#include <util/system/defaults.h>
+#include <util/system/hi_lo.h>
+#include <util/system/filemap.h>
+
+TMappedFile::TMappedFile(TFileMap* map, const char* dbgName) {
+ Map_ = map;
+ i64 len = Map_->Length();
+ if (Hi32(len) != 0 && sizeof(size_t) <= sizeof(ui32))
+ ythrow yexception() << "File '" << dbgName << "' mapping error: " << len << " too large";
+
+ Map_->Map(0, static_cast<size_t>(len));
+}
+
+TMappedFile::TMappedFile(const TFile& file, TFileMap::EOpenMode om, const char* dbgName)
+ : Map_(nullptr)
+{
+ init(file, om, dbgName);
+}
+
+void TMappedFile::precharge(size_t off, size_t size) const {
+ if (!Map_)
+ return;
+
+ Map_->Precharge(off, size);
+}
+
+void TMappedFile::init(const TString& name) {
+ THolder<TFileMap> map(new TFileMap(name));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TString& name, size_t length, TFileMap::EOpenMode om) {
+ THolder<TFileMap> map(new TFileMap(name, length, om));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TFile& file, TFileMap::EOpenMode om, const char* dbgName) {
+ THolder<TFileMap> map(new TFileMap(file, om));
+ TMappedFile newFile(map.Get(), dbgName);
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::init(const TString& name, TFileMap::EOpenMode om) {
+ THolder<TFileMap> map(new TFileMap(name, om));
+ TMappedFile newFile(map.Get(), name.data());
+ Y_UNUSED(map.Release());
+ newFile.swap(*this);
+ newFile.term();
+}
+
+void TMappedFile::flush() {
+ Map_->Flush();
+}
diff --git a/library/cpp/deprecated/mapped_file/ya.make b/library/cpp/deprecated/mapped_file/ya.make
new file mode 100644
index 0000000000..309341f1da
--- /dev/null
+++ b/library/cpp/deprecated/mapped_file/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ mapped_file.cpp
+)
+
+END()
diff --git a/library/cpp/eventlog/common.h b/library/cpp/eventlog/common.h
new file mode 100644
index 0000000000..75c512c13e
--- /dev/null
+++ b/library/cpp/eventlog/common.h
@@ -0,0 +1,10 @@
+#pragma once
+
+template <class T>
+class TPacketInputStream {
+public:
+ virtual bool Avail() const = 0;
+ virtual T operator*() const = 0;
+ virtual bool Next() = 0;
+ virtual ~TPacketInputStream() = default;
+};
diff --git a/library/cpp/eventlog/evdecoder.cpp b/library/cpp/eventlog/evdecoder.cpp
new file mode 100644
index 0000000000..e4413a1b0e
--- /dev/null
+++ b/library/cpp/eventlog/evdecoder.cpp
@@ -0,0 +1,112 @@
+#include <util/memory/tempbuf.h>
+#include <util/string/cast.h>
+#include <util/stream/output.h>
+
+#include "evdecoder.h"
+#include "logparser.h"
+
+static const char* const UNKNOWN_EVENT_CLASS = "Unknown event class";
+
+static inline void LogError(ui64 frameAddr, const char* msg, bool strict) {
+ if (!strict) {
+ Cerr << "EventDecoder warning @" << frameAddr << ": " << msg << Endl;
+ } else {
+ ythrow yexception() << "EventDecoder error @" << frameAddr << ": " << msg;
+ }
+}
+
+static inline bool SkipData(IInputStream& s, size_t amount) {
+ return (amount == s.Skip(amount));
+}
+
+// There are 2 log fomats: the one, that allows event skip without event decode (it has stored event length)
+// and another, that requires each event decode just to seek over stream. needRead == true means the latter format.
+static inline THolder<TEvent> DoDecodeEvent(IInputStream& s, const TEventFilter* const filter, const bool needRead, IEventFactory* fac) {
+ TEventTimestamp ts;
+ TEventClass c;
+ THolder<TEvent> e;
+
+ ::Load(&s, ts);
+ ::Load(&s, c);
+
+ bool needReturn = false;
+
+ if (!filter || filter->EventAllowed(c)) {
+ needReturn = true;
+ }
+
+ if (needRead || needReturn) {
+ e.Reset(fac->CreateLogEvent(c));
+
+ if (!!e) {
+ e->Timestamp = ts;
+ e->Load(s);
+ } else if (needReturn) {
+ e.Reset(new TUnknownEvent(ts, c));
+ }
+
+ if (!needReturn) {
+ e.Reset(nullptr);
+ }
+ }
+
+ return e;
+}
+
+THolder<TEvent> DecodeFramed(IInputStream& inp, ui64 frameAddr, const TEventFilter* const filter, IEventFactory* fac, bool strict) {
+ ui32 len;
+ ::Load(&inp, len);
+
+ if (len < sizeof(ui32)) {
+ ythrow TEventDecoderError() << "invalid event length";
+ }
+
+ TLengthLimitedInput s(&inp, len - sizeof(ui32));
+
+ try {
+ THolder<TEvent> e = DoDecodeEvent(s, filter, false, fac);
+ if (!!e) {
+ if (!s.Left()) {
+ return e;
+ } else if (e->Class == 0) {
+ if (!SkipData(s, s.Left())) {
+ ythrow TEventDecoderError() << "cannot skip bad event";
+ }
+
+ return e;
+ }
+
+ LogError(frameAddr, "Event is not fully read", strict);
+ }
+ } catch (const TLoadEOF&) {
+ if (s.Left()) {
+ throw;
+ }
+
+ LogError(frameAddr, "Unexpected event end", strict);
+ }
+
+ if (!SkipData(s, s.Left())) {
+ ythrow TEventDecoderError() << "cannot skip bad event";
+ }
+
+ return nullptr;
+}
+
+THolder<TEvent> DecodeEvent(IInputStream& s, bool framed, ui64 frameAddr, const TEventFilter* const filter, IEventFactory* fac, bool strict) {
+ try {
+ if (framed) {
+ return DecodeFramed(s, frameAddr, filter, fac, strict);
+ } else {
+ THolder<TEvent> e = DoDecodeEvent(s, filter, true, fac);
+ // e(0) means event, skipped by filter. Not an error.
+ if (!!e && !e->Class) {
+ ythrow TEventDecoderError() << UNKNOWN_EVENT_CLASS;
+ }
+
+ return e;
+ }
+ } catch (const TLoadEOF&) {
+ ythrow TEventDecoderError() << "unexpected frame end";
+ }
+}
diff --git a/library/cpp/eventlog/evdecoder.h b/library/cpp/eventlog/evdecoder.h
new file mode 100644
index 0000000000..eedfc82174
--- /dev/null
+++ b/library/cpp/eventlog/evdecoder.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/generic/ptr.h>
+
+#include "eventlog.h"
+
+class TEvent;
+class IInputStream;
+class TEventFilter;
+
+struct TEventDecoderError: public yexception {
+};
+
+THolder<TEvent> DecodeEvent(IInputStream& s, bool framed, ui64 frameAddr, const TEventFilter* const filter, IEventFactory* fac, bool strict = false);
+bool AcceptableContent(TEventLogFormat);
diff --git a/library/cpp/eventlog/event_field_output.cpp b/library/cpp/eventlog/event_field_output.cpp
new file mode 100644
index 0000000000..f9d98dac9d
--- /dev/null
+++ b/library/cpp/eventlog/event_field_output.cpp
@@ -0,0 +1,68 @@
+#include "event_field_output.h"
+
+#include <util/string/split.h>
+
+namespace {
+ TString MakeSeparators(EFieldOutputFlags flags) {
+ TString res;
+ res.reserve(3);
+
+ if (flags & EFieldOutputFlag::EscapeTab) {
+ res.append('\t');
+ }
+ if (flags & EFieldOutputFlag::EscapeNewLine) {
+ res.append('\n');
+ res.append('\r');
+ }
+ if (flags & EFieldOutputFlag::EscapeBackSlash) {
+ res.append('\\');
+ }
+
+ return res;
+ }
+}
+
+TEventFieldOutput::TEventFieldOutput(IOutputStream& output, EFieldOutputFlags flags)
+ : Output(output)
+ , Flags(flags)
+ , Separators(MakeSeparators(flags))
+{
+}
+
+IOutputStream& TEventFieldOutput::GetOutputStream() {
+ return Output;
+}
+
+EFieldOutputFlags TEventFieldOutput::GetFlags() const {
+ return Flags;
+}
+
+void TEventFieldOutput::DoWrite(const void* buf, size_t len) {
+ if (!Flags) {
+ Output.Write(buf, len);
+ return;
+ }
+
+ TStringBuf chunk{static_cast<const char*>(buf), len};
+
+ for (const auto part : StringSplitter(chunk).SplitBySet(Separators.data())) {
+ TStringBuf token = part.Token();
+ TStringBuf delim = part.Delim();
+
+ if (!token.empty()) {
+ Output.Write(token);
+ }
+ if ("\n" == delim) {
+ Output.Write(TStringBuf("\\n"));
+ } else if ("\r" == delim) {
+ Output.Write(TStringBuf("\\r"));
+ } else if ("\t" == delim) {
+ Output.Write(TStringBuf("\\t"));
+ } else if ("\\" == delim) {
+ Output.Write(TStringBuf("\\\\"));
+ } else {
+ Y_ASSERT(delim.empty());
+ }
+ }
+}
+
diff --git a/library/cpp/eventlog/event_field_output.h b/library/cpp/eventlog/event_field_output.h
new file mode 100644
index 0000000000..ed9db0ae16
--- /dev/null
+++ b/library/cpp/eventlog/event_field_output.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <util/stream/output.h>
+#include <util/generic/flags.h>
+
+enum class EFieldOutputFlag {
+ EscapeTab = 0x1, // escape \t in field value
+ EscapeNewLine = 0x2, // escape \n in field value
+ EscapeBackSlash = 0x4 // escape \ in field value
+};
+
+Y_DECLARE_FLAGS(EFieldOutputFlags, EFieldOutputFlag);
+Y_DECLARE_OPERATORS_FOR_FLAGS(EFieldOutputFlags);
+
+class TEventFieldOutput: public IOutputStream {
+public:
+ TEventFieldOutput(IOutputStream& output, EFieldOutputFlags flags);
+
+ IOutputStream& GetOutputStream();
+ EFieldOutputFlags GetFlags() const;
+
+protected:
+ void DoWrite(const void* buf, size_t len) override;
+
+private:
+ IOutputStream& Output;
+ EFieldOutputFlags Flags;
+ TString Separators;
+};
diff --git a/library/cpp/eventlog/event_field_printer.cpp b/library/cpp/eventlog/event_field_printer.cpp
new file mode 100644
index 0000000000..29c6b4b661
--- /dev/null
+++ b/library/cpp/eventlog/event_field_printer.cpp
@@ -0,0 +1,27 @@
+#include "event_field_printer.h"
+
+#include <library/cpp/protobuf/json/proto2json.h>
+
+namespace {
+
+ const NProtobufJson::TProto2JsonConfig PROTO_2_JSON_CONFIG = NProtobufJson::TProto2JsonConfig()
+ .SetMissingRepeatedKeyMode(NProtobufJson::TProto2JsonConfig::MissingKeyDefault)
+ .AddStringTransform(MakeIntrusive<NProtobufJson::TBase64EncodeBytesTransform>());
+
+} // namespace
+
+TEventProtobufMessageFieldPrinter::TEventProtobufMessageFieldPrinter(EProtobufMessageFieldPrintMode mode)
+ : Mode(mode)
+{}
+
+template <>
+void TEventProtobufMessageFieldPrinter::PrintProtobufMessageFieldToOutput<google::protobuf::Message, false>(const google::protobuf::Message& field, TEventFieldOutput& output) {
+ switch (Mode) {
+ case EProtobufMessageFieldPrintMode::DEFAULT:
+ case EProtobufMessageFieldPrintMode::JSON: {
+ // Do not use field.PrintJSON() here: IGNIETFERRO-2002
+ NProtobufJson::Proto2Json(field, output, PROTO_2_JSON_CONFIG);
+ break;
+ }
+ }
+}
diff --git a/library/cpp/eventlog/event_field_printer.h b/library/cpp/eventlog/event_field_printer.h
new file mode 100644
index 0000000000..835e8f4a85
--- /dev/null
+++ b/library/cpp/eventlog/event_field_printer.h
@@ -0,0 +1,38 @@
+#pragma once
+
+#include "event_field_output.h"
+
+#include <google/protobuf/message.h>
+
+// NB: For historical reasons print code for all primitive types/repeated fields/etc generated by https://a.yandex-team.ru/arc/trunk/arcadia/tools/event2cpp
+
+enum class EProtobufMessageFieldPrintMode {
+ // Use <TEventProtobufMessageFieldType>::Print method for fields that has it
+ // Print json for other fields
+ DEFAULT = 0,
+
+ JSON = 1,
+};
+
+class TEventProtobufMessageFieldPrinter {
+public:
+ explicit TEventProtobufMessageFieldPrinter(EProtobufMessageFieldPrintMode mode);
+
+ template <typename TEventProtobufMessageFieldType, bool HasPrintFunction>
+ void PrintProtobufMessageFieldToOutput(const TEventProtobufMessageFieldType& field, TEventFieldOutput& output) {
+ if constexpr (HasPrintFunction) {
+ if (Mode == EProtobufMessageFieldPrintMode::DEFAULT) {
+ field.Print(output.GetOutputStream(), output.GetFlags());
+ return;
+ }
+ }
+
+ PrintProtobufMessageFieldToOutput<google::protobuf::Message, false>(field, output);
+ }
+
+ template <>
+ void PrintProtobufMessageFieldToOutput<google::protobuf::Message, false>(const google::protobuf::Message& field, TEventFieldOutput& output);
+
+private:
+ EProtobufMessageFieldPrintMode Mode;
+};
diff --git a/library/cpp/eventlog/eventlog.cpp b/library/cpp/eventlog/eventlog.cpp
new file mode 100644
index 0000000000..458a632b4a
--- /dev/null
+++ b/library/cpp/eventlog/eventlog.cpp
@@ -0,0 +1,554 @@
+#include <util/datetime/base.h>
+#include <util/stream/zlib.h>
+#include <util/stream/length.h>
+#include <util/generic/buffer.h>
+#include <util/generic/yexception.h>
+#include <util/digest/murmur.h>
+#include <util/generic/singleton.h>
+#include <util/generic/function.h>
+#include <util/stream/output.h>
+#include <util/stream/format.h>
+#include <util/stream/null.h>
+
+#include <google/protobuf/messagext.h>
+
+#include "eventlog.h"
+#include "events_extension.h"
+#include "evdecoder.h"
+#include "logparser.h"
+#include <library/cpp/eventlog/proto/internal.pb.h>
+
+#include <library/cpp/json/json_writer.h>
+#include <library/cpp/protobuf/json/proto2json.h>
+
+
+TAtomic eventlogFrameCounter = 0;
+
+namespace {
+
+ const NProtobufJson::TProto2JsonConfig PROTO_2_JSON_CONFIG = NProtobufJson::TProto2JsonConfig()
+ .SetMissingRepeatedKeyMode(NProtobufJson::TProto2JsonConfig::MissingKeyDefault)
+ .AddStringTransform(MakeIntrusive<NProtobufJson::TBase64EncodeBytesTransform>());
+
+ ui32 GenerateFrameId() {
+ return ui32(AtomicAdd(eventlogFrameCounter, 1));
+ }
+
+ inline const NProtoBuf::Message* UnknownEventMessage() {
+ return Singleton<NEventLogInternal::TUnknownEvent>();
+ }
+
+} // namespace
+
+void TEvent::Print(IOutputStream& out, const TOutputOptions& options, const TEventState& eventState) const {
+ if (options.OutputFormat == TOutputFormat::TabSeparatedRaw) {
+ PrintHeader(out, options, eventState);
+ DoPrint(out, {});
+ } else if (options.OutputFormat == TOutputFormat::TabSeparated) {
+ PrintHeader(out, options, eventState);
+ DoPrint(
+ out,
+ EFieldOutputFlags{} | EFieldOutputFlag::EscapeNewLine | EFieldOutputFlag::EscapeBackSlash);
+ } else if (options.OutputFormat == TOutputFormat::Json) {
+ NJson::TJsonWriterConfig jsonWriterConfig;
+ jsonWriterConfig.FormatOutput = 0;
+ NJson::TJsonWriter jsonWriter(&out, jsonWriterConfig);
+
+ jsonWriter.OpenMap();
+ PrintJsonHeader(jsonWriter);
+ DoPrintJson(jsonWriter);
+ jsonWriter.CloseMap();
+ }
+}
+
+void TEvent::PrintHeader(IOutputStream& out, const TOutputOptions& options, const TEventState& eventState) const {
+ if (options.HumanReadable) {
+ out << TInstant::MicroSeconds(Timestamp).ToString() << "\t";
+ if (Timestamp >= eventState.FrameStartTime)
+ out << "+" << HumanReadable(TDuration::MicroSeconds(Timestamp - eventState.FrameStartTime));
+ else // a bug somewhere? anyway, let's handle it in a nice fashion
+ out << "-" << HumanReadable(TDuration::MicroSeconds(eventState.FrameStartTime - Timestamp));
+
+ if (Timestamp >= eventState.PrevEventTime)
+ out << " (+" << HumanReadable(TDuration::MicroSeconds(Timestamp - eventState.PrevEventTime)) << ")";
+ // else: these events are async and out-of-order, relative time diff makes no sense, skip it
+
+ out << "\tF# " << FrameId << '\t';
+ } else {
+ out << static_cast<TEventTimestamp>(Timestamp);
+ out << '\t' << FrameId << '\t';
+ }
+}
+
+void TEvent::PrintJsonHeader(NJson::TJsonWriter& jsonWriter) const {
+ jsonWriter.Write("Timestamp", Timestamp);
+ jsonWriter.Write("FrameId", FrameId);
+}
+
+class TProtobufEvent: public TEvent {
+public:
+ TProtobufEvent(TEventTimestamp t, size_t eventId, const NProtoBuf::Message& msg)
+ : TEvent(eventId, t)
+ , Message_(&msg)
+ , EventFactory_(NProtoBuf::TEventFactory::Instance())
+ {
+ }
+
+ TProtobufEvent()
+ : TEvent(0, 0)
+ , EventFactory_(NProtoBuf::TEventFactory::Instance())
+ {
+ }
+
+ explicit TProtobufEvent(ui32 id, NProtoBuf::TEventFactory* eventFactory = NProtoBuf::TEventFactory::Instance())
+ : TEvent(id, 0)
+ , EventFactory_(eventFactory)
+ {
+ InnerMsg_.Reset(EventFactory_->CreateEvent(Class));
+ Message_ = InnerMsg_.Get();
+ }
+
+ ui32 Id() const {
+ return Class;
+ }
+
+ void Load(IInputStream& in) override {
+ if (!!InnerMsg_) {
+ InnerMsg_->ParseFromArcadiaStream(&in);
+ } else {
+ TransferData(&in, &Cnull);
+ }
+ }
+
+ void Save(IOutputStream& out) const override {
+ Message_->SerializeToArcadiaStream(&out);
+ }
+
+ void SaveToBuffer(TBufferOutput& buf) const override {
+ size_t messageSize = Message_->ByteSize();
+ size_t before = buf.Buffer().Size();
+ buf.Buffer().Advance(messageSize);
+ Y_PROTOBUF_SUPPRESS_NODISCARD Message_->SerializeToArray(buf.Buffer().Data() + before, messageSize);
+ }
+
+ TStringBuf GetName() const override {
+ return EventFactory_->NameById(Id());
+ }
+
+private:
+ void DoPrint(IOutputStream& out, EFieldOutputFlags flags) const override {
+ EventFactory_->PrintEvent(Id(), Message_, out, flags);
+ }
+ void DoPrintJson(NJson::TJsonWriter& jsonWriter) const override {
+ jsonWriter.OpenMap("EventBody");
+ jsonWriter.Write("Type", GetName());
+
+ jsonWriter.Write("Fields");
+ NProtobufJson::Proto2Json(*GetProto(), jsonWriter, PROTO_2_JSON_CONFIG);
+
+ jsonWriter.CloseMap();
+ }
+
+ const NProtoBuf::Message* GetProto() const override {
+ if (Message_) {
+ return Message_;
+ }
+
+ return UnknownEventMessage();
+ }
+
+private:
+ const NProtoBuf::Message* Message_ = nullptr;
+ NProtoBuf::TEventFactory* EventFactory_;
+ THolder<NProtoBuf::Message> InnerMsg_;
+
+ friend class TEventLogFrame;
+};
+
+void TEventLogFrame::LogProtobufEvent(size_t eventId, const NProtoBuf::Message& ev) {
+ TProtobufEvent event(Now().MicroSeconds(), eventId, ev);
+
+ LogEventImpl(event);
+}
+
+void TEventLogFrame::LogProtobufEvent(TEventTimestamp timestamp, size_t eventId, const NProtoBuf::Message& ev) {
+ TProtobufEvent event(timestamp, eventId, ev);
+
+ LogEventImpl(event);
+}
+
+template <>
+void TEventLogFrame::DebugDump(const TProtobufEvent& ev) {
+ static TMutex lock;
+
+ with_lock (lock) {
+ Cerr << ev.Timestamp << "\t" << ev.GetName() << "\t";
+ ev.GetProto()->PrintJSON(Cerr);
+ Cerr << Endl;
+ }
+}
+
+#pragma pack(push, 1)
+struct TFrameHeaderData {
+ char SyncField[COMPRESSED_LOG_FRAME_SYNC_DATA.size()];
+ TCompressedFrameBaseHeader Header;
+ TCompressedFrameHeader2 HeaderEx;
+};
+#pragma pack(pop)
+
+TEventLogFrame::TEventLogFrame(IEventLog& parentLog, bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : EvLog_(parentLog.HasNullBackend() ? nullptr : &parentLog)
+ , NeedAlwaysSafeAdd_(needAlwaysSafeAdd)
+ , ForceDump_(false)
+ , WriteFrameCallback_(std::move(writeFrameCallback))
+{
+ DoInit();
+}
+
+TEventLogFrame::TEventLogFrame(IEventLog* parentLog, bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : EvLog_(parentLog)
+ , NeedAlwaysSafeAdd_(needAlwaysSafeAdd)
+ , ForceDump_(false)
+ , WriteFrameCallback_(std::move(writeFrameCallback))
+{
+ if (EvLog_ && EvLog_->HasNullBackend()) {
+ EvLog_ = nullptr;
+ }
+
+ DoInit();
+}
+
+TEventLogFrame::TEventLogFrame(bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : EvLog_(nullptr)
+ , NeedAlwaysSafeAdd_(needAlwaysSafeAdd)
+ , ForceDump_(false)
+ , WriteFrameCallback_(std::move(writeFrameCallback))
+{
+ DoInit();
+}
+
+void TEventLogFrame::Flush() {
+ if (EvLog_ == nullptr)
+ return;
+
+ TBuffer& buf = Buf_.Buffer();
+
+ if (buf.Empty()) {
+ return;
+ }
+
+ EvLog_->WriteFrame(buf, StartTimestamp_, EndTimestamp_, WriteFrameCallback_, std::move(MetaFlags_));
+
+ DoInit();
+
+ return;
+}
+
+void TEventLogFrame::SafeFlush() {
+ TGuard<TMutex> g(Mtx_);
+ Flush();
+}
+
+void TEventLogFrame::AddEvent(TEventTimestamp timestamp) {
+ if (timestamp < StartTimestamp_) {
+ StartTimestamp_ = timestamp;
+ }
+
+ if (timestamp > EndTimestamp_) {
+ EndTimestamp_ = timestamp;
+ }
+}
+
+void TEventLogFrame::DoInit() {
+ Buf_.Buffer().Clear();
+
+ StartTimestamp_ = (TEventTimestamp)-1;
+ EndTimestamp_ = 0;
+}
+
+void TEventLogFrame::VisitEvents(ILogFrameEventVisitor& visitor, IEventFactory* eventFactory) {
+ const auto doVisit = [this, &visitor, eventFactory]() {
+ TBuffer& buf = Buf_.Buffer();
+
+ TBufferInput bufferInput(buf);
+ TLengthLimitedInput limitedInput(&bufferInput, buf.size());
+
+ TEventFilter EventFilter(false);
+
+ while (limitedInput.Left()) {
+ THolder<TEvent> event = DecodeEvent(limitedInput, true, 0, &EventFilter, eventFactory);
+
+ visitor.Visit(*event);
+ }
+ };
+ if (NeedAlwaysSafeAdd_) {
+ TGuard<TMutex> g(Mtx_);
+ doVisit();
+ } else {
+ doVisit();
+ }
+}
+
+TSelfFlushLogFrame::TSelfFlushLogFrame(IEventLog& parentLog, bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : TEventLogFrame(parentLog, needAlwaysSafeAdd, std::move(writeFrameCallback))
+{
+}
+
+TSelfFlushLogFrame::TSelfFlushLogFrame(IEventLog* parentLog, bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : TEventLogFrame(parentLog, needAlwaysSafeAdd, std::move(writeFrameCallback))
+{
+}
+
+TSelfFlushLogFrame::TSelfFlushLogFrame(bool needAlwaysSafeAdd, TWriteFrameCallbackPtr writeFrameCallback)
+ : TEventLogFrame(needAlwaysSafeAdd, std::move(writeFrameCallback))
+{
+}
+
+TSelfFlushLogFrame::~TSelfFlushLogFrame() {
+ try {
+ Flush();
+ } catch (...) {
+ }
+}
+
+IEventLog::~IEventLog() {
+}
+
+static THolder<TLogBackend> ConstructBackend(const TString& fileName, const TEventLogBackendOptions& backendOpts) {
+ try {
+ THolder<TLogBackend> backend;
+ if (backendOpts.UseSyncPageCacheBackend) {
+ backend = MakeHolder<TSyncPageCacheFileLogBackend>(fileName, backendOpts.SyncPageCacheBackendBufferSize, backendOpts.SyncPageCacheBackendMaxPendingSize);
+ } else {
+ backend = MakeHolder<TFileLogBackend>(fileName);
+ }
+ return MakeHolder<TReopenLogBackend>(std::move(backend));
+ } catch (...) {
+ Cdbg << "Warning: Cannot open event log '" << fileName << "': " << CurrentExceptionMessage() << "." << Endl;
+ }
+
+ return MakeHolder<TNullLogBackend>();
+}
+
+TEventLog::TEventLog(const TString& fileName, TEventLogFormat contentFormat, const TEventLogBackendOptions& backendOpts, TMaybe<TEventLogFormat> logFormat)
+ : Log_(ConstructBackend(fileName, backendOpts))
+ , ContentFormat_(contentFormat)
+ , LogFormat_(logFormat.Defined() ? *logFormat : COMPRESSED_LOG_FORMAT_V4)
+ , HasNullBackend_(Log_.IsNullLog())
+ , Lz4hcCodec_(NBlockCodecs::Codec("lz4hc"))
+ , ZstdCodec_(NBlockCodecs::Codec("zstd_1"))
+{
+ Y_ENSURE(LogFormat_ == COMPRESSED_LOG_FORMAT_V4 || LogFormat_ == COMPRESSED_LOG_FORMAT_V5);
+
+ if (contentFormat & 0xff000000) {
+ ythrow yexception() << "wrong compressed event log content format code (" << contentFormat << ")";
+ }
+}
+
+TEventLog::TEventLog(const TString& fileName, TEventLogFormat contentFormat, const TEventLogBackendOptions& backendOpts)
+ : TEventLog(fileName, contentFormat, backendOpts, COMPRESSED_LOG_FORMAT_V4)
+{
+}
+
+TEventLog::TEventLog(const TLog& log, TEventLogFormat contentFormat, TEventLogFormat logFormat)
+ : Log_(log)
+ , ContentFormat_(contentFormat)
+ , LogFormat_(logFormat)
+ , HasNullBackend_(Log_.IsNullLog())
+ , Lz4hcCodec_(NBlockCodecs::Codec("lz4hc"))
+ , ZstdCodec_(NBlockCodecs::Codec("zstd_1"))
+{
+ if (contentFormat & 0xff000000) {
+ ythrow yexception() << "wrong compressed event log content format code (" << contentFormat << ")";
+ }
+}
+
+TEventLog::TEventLog(TEventLogFormat contentFormat, TEventLogFormat logFormat)
+ : Log_(MakeHolder<TNullLogBackend>())
+ , ContentFormat_(contentFormat)
+ , LogFormat_(logFormat)
+ , HasNullBackend_(true)
+ , Lz4hcCodec_(NBlockCodecs::Codec("lz4hc"))
+ , ZstdCodec_(NBlockCodecs::Codec("zstd_1"))
+{
+ if (contentFormat & 0xff000000) {
+ ythrow yexception() << "wrong compressed event log content format code (" << contentFormat << ")";
+ }
+}
+
+TEventLog::~TEventLog() {
+}
+
+void TEventLog::ReopenLog() {
+ Log_.ReopenLog();
+}
+
+void TEventLog::CloseLog() {
+ Log_.CloseLog();
+}
+
+void TEventLog::Flush() {
+}
+
+namespace {
+ class TOnExceptionAction {
+ public:
+ TOnExceptionAction(std::function<void()>&& f)
+ : F_(std::move(f))
+ {
+ }
+
+ ~TOnExceptionAction() {
+ if (F_ && UncaughtException()) {
+ try {
+ F_();
+ } catch (...) {
+ }
+ }
+ }
+
+ private:
+ std::function<void()> F_;
+ };
+}
+
+void TEventLog::WriteFrame(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ TWriteFrameCallbackPtr writeFrameCallback,
+ TLogRecord::TMetaFlags metaFlags) {
+ Y_ENSURE(LogFormat_ == COMPRESSED_LOG_FORMAT_V4 || LogFormat_ == COMPRESSED_LOG_FORMAT_V5);
+
+ TBuffer& b1 = buffer;
+
+ size_t maxCompressedLength = (LogFormat_ == COMPRESSED_LOG_FORMAT_V4) ? b1.Size() + 256 : ZstdCodec_->MaxCompressedLength(b1);
+
+ // Reserve enough memory to minimize reallocs
+ TBufferOutput outbuf(sizeof(TFrameHeaderData) + maxCompressedLength);
+ TBuffer& b2 = outbuf.Buffer();
+ b2.Proceed(sizeof(TFrameHeaderData));
+
+ {
+ TFrameHeaderData& hdr = *reinterpret_cast<TFrameHeaderData*>(b2.data());
+
+ memcpy(hdr.SyncField, COMPRESSED_LOG_FRAME_SYNC_DATA.data(), COMPRESSED_LOG_FRAME_SYNC_DATA.size());
+ hdr.Header.Format = (LogFormat_ << 24) | (ContentFormat_ & 0xffffff);
+ hdr.Header.FrameId = GenerateFrameId();
+ hdr.HeaderEx.UncompressedDatalen = (ui32)b1.Size();
+ hdr.HeaderEx.StartTimestamp = startTimestamp;
+ hdr.HeaderEx.EndTimestamp = endTimestamp;
+ hdr.HeaderEx.PayloadChecksum = 0;
+ hdr.HeaderEx.CompressorVersion = 0;
+ }
+
+ if (LogFormat_ == COMPRESSED_LOG_FORMAT_V4) {
+ TBuffer encoded(b1.Size() + sizeof(TFrameHeaderData) + 256);
+ Lz4hcCodec_->Encode(b1, encoded);
+
+ TZLibCompress compr(&outbuf, ZLib::ZLib, 6, 2048);
+ compr.Write(encoded.data(), encoded.size());
+ compr.Finish();
+ } else {
+ b2.Advance(ZstdCodec_->Compress(b1, b2.Pos()));
+ }
+
+ {
+ const size_t k = sizeof(TCompressedFrameBaseHeader) + COMPRESSED_LOG_FRAME_SYNC_DATA.size();
+ TFrameHeaderData& hdr = *reinterpret_cast<TFrameHeaderData*>(b2.data());
+ hdr.Header.Length = static_cast<ui32>(b2.size() - k);
+ hdr.HeaderEx.PayloadChecksum = MurmurHash<ui32>(b2.data() + sizeof(TFrameHeaderData), b2.size() - sizeof(TFrameHeaderData));
+
+ const size_t n = sizeof(TFrameHeaderData) - (COMPRESSED_LOG_FRAME_SYNC_DATA.size() + sizeof(hdr.HeaderEx.HeaderChecksum));
+ hdr.HeaderEx.HeaderChecksum = MurmurHash<ui32>(b2.data() + COMPRESSED_LOG_FRAME_SYNC_DATA.size(), n);
+ }
+
+ const TBuffer& frameData = outbuf.Buffer();
+
+ TOnExceptionAction actionCallback([this] {
+ if (ErrorCallback_) {
+ ErrorCallback_->OnWriteError();
+ }
+ });
+
+ if (writeFrameCallback) {
+ writeFrameCallback->OnAfterCompress(frameData, startTimestamp, endTimestamp);
+ }
+
+ Log_.Write(frameData.Data(), frameData.Size(), std::move(metaFlags));
+ if (SuccessCallback_) {
+ SuccessCallback_->OnWriteSuccess(frameData);
+ }
+}
+
+TEvent* TProtobufEventFactory::CreateLogEvent(TEventClass c) {
+ return new TProtobufEvent(c, EventFactory_);
+}
+
+TEventClass TProtobufEventFactory::ClassByName(TStringBuf name) const {
+ return EventFactory_->IdByName(name);
+}
+
+TEventClass TProtobufEventFactory::EventClassBegin() const {
+ const auto& items = EventFactory_->FactoryItems();
+
+ if (items.empty()) {
+ return static_cast<TEventClass>(0);
+ }
+
+ return static_cast<TEventClass>(items.begin()->first);
+}
+
+TEventClass TProtobufEventFactory::EventClassEnd() const {
+ const auto& items = EventFactory_->FactoryItems();
+
+ if (items.empty()) {
+ return static_cast<TEventClass>(0);
+ }
+
+ return static_cast<TEventClass>(items.rbegin()->first + 1);
+}
+
+namespace NEvClass {
+ IEventFactory* Factory() {
+ return Singleton<TProtobufEventFactory>();
+ }
+
+ IEventProcessor* Processor() {
+ return Singleton<TProtobufEventProcessor>();
+ }
+}
+
+const NProtoBuf::Message* TUnknownEvent::GetProto() const {
+ return UnknownEventMessage();
+}
+
+TStringBuf TUnknownEvent::GetName() const {
+ return TStringBuf("UnknownEvent");
+}
+
+void TUnknownEvent::DoPrintJson(NJson::TJsonWriter& jsonWriter) const {
+ jsonWriter.OpenMap("EventBody");
+ jsonWriter.Write("Type", GetName());
+ jsonWriter.Write("EventId", (size_t)Class);
+ jsonWriter.CloseMap();
+}
+
+TStringBuf TEndOfFrameEvent::GetName() const {
+ return TStringBuf("EndOfFrame");
+}
+
+const NProtoBuf::Message* TEndOfFrameEvent::GetProto() const {
+ return Singleton<NEventLogInternal::TEndOfFrameEvent>();
+}
+
+void TEndOfFrameEvent::DoPrintJson(NJson::TJsonWriter& jsonWriter) const {
+ jsonWriter.OpenMap("EventBody");
+ jsonWriter.Write("Type", GetName());
+ jsonWriter.OpenMap("Fields");
+ jsonWriter.CloseMap();
+ jsonWriter.CloseMap();
+}
+
+THolder<TEvent> MakeProtobufLogEvent(TEventTimestamp ts, TEventClass eventId, google::protobuf::Message& ev) {
+ return MakeHolder<TProtobufEvent>(ts, eventId, ev);
+}
diff --git a/library/cpp/eventlog/eventlog.h b/library/cpp/eventlog/eventlog.h
new file mode 100644
index 0000000000..45c2dfb17f
--- /dev/null
+++ b/library/cpp/eventlog/eventlog.h
@@ -0,0 +1,623 @@
+#pragma once
+
+#include "eventlog_int.h"
+#include "event_field_output.h"
+#include "events_extension.h"
+
+#include <library/cpp/blockcodecs/codecs.h>
+#include <library/cpp/logger/all.h>
+
+#include <google/protobuf/message.h>
+
+#include <util/datetime/base.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+#include <util/stream/buffer.h>
+#include <util/stream/str.h>
+#include <util/system/mutex.h>
+#include <util/stream/output.h>
+#include <util/system/env.h>
+#include <util/system/unaligned_mem.h>
+#include <util/ysaveload.h>
+
+#include <cstdlib>
+
+namespace NJson {
+ class TJsonWriter;
+}
+
+class IEventLog;
+
+class TEvent : public TThrRefBase {
+public:
+ enum class TOutputFormat {
+ TabSeparated,
+ TabSeparatedRaw, // disables escaping
+ Json
+ };
+
+ struct TOutputOptions {
+ TOutputFormat OutputFormat = TOutputFormat::TabSeparated;
+ // Dump some fields (e.g. timestamp) in more human-readable format
+ bool HumanReadable = false;
+
+ TOutputOptions(TOutputFormat outputFormat = TOutputFormat::TabSeparated)
+ : OutputFormat(outputFormat)
+ {
+ }
+
+ TOutputOptions(TOutputFormat outputFormat, bool humanReadable)
+ : OutputFormat(outputFormat)
+ , HumanReadable(humanReadable)
+ {
+ }
+ };
+
+ struct TEventState {
+ TEventTimestamp FrameStartTime = 0;
+ TEventTimestamp PrevEventTime = 0;
+ TEventState() {
+ }
+ };
+
+ TEvent(TEventClass c, TEventTimestamp t)
+ : Class(c)
+ , Timestamp(t)
+ {
+ }
+
+ virtual ~TEvent() = default;
+
+ // Note, that descendants MUST have Save() & Load() methods to alter
+ // only its new variables, not the base class!
+ virtual void Save(IOutputStream& out) const = 0;
+ virtual void SaveToBuffer(TBufferOutput& out) const {
+ Save(out);
+ }
+
+ // Note, that descendants MUST have Save() & Load() methods to alter
+ // only its new variables, not the base class!
+ virtual void Load(IInputStream& i) = 0;
+
+ virtual TStringBuf GetName() const = 0;
+ virtual const NProtoBuf::Message* GetProto() const = 0;
+
+ void Print(IOutputStream& out, const TOutputOptions& options = TOutputOptions(), const TEventState& eventState = TEventState()) const;
+ void PrintHeader(IOutputStream& out, const TOutputOptions& options, const TEventState& eventState) const;
+
+ TString ToString() const {
+ TStringStream buff;
+ Print(buff);
+ return buff.Str();
+ }
+
+ void FullSaveToBuffer(TBufferOutput& buf) const {
+ SaveMessageHeader(buf);
+ this->SaveToBuffer(buf);
+ }
+
+ void FullSave(IOutputStream& o) const {
+ SaveMessageHeader(o);
+ this->Save(o);
+ }
+
+ void FullLoad(IInputStream& i) {
+ ::Load(&i, Timestamp);
+ ::Load(&i, Class);
+ this->Load(i);
+ }
+
+ template <class T>
+ const T* Get() const {
+ return static_cast<const T*>(this->GetProto());
+ }
+
+ TEventClass Class;
+ TEventTimestamp Timestamp;
+ ui32 FrameId = 0;
+
+private:
+ void SaveMessageHeader(IOutputStream& out) const {
+ ::Save(&out, Timestamp);
+ ::Save(&out, Class);
+ }
+
+ virtual void DoPrint(IOutputStream& out, EFieldOutputFlags flags) const = 0;
+ virtual void DoPrintJson(NJson::TJsonWriter& jsonWriter) const = 0;
+
+ void PrintJsonHeader(NJson::TJsonWriter& jsonWriter) const;
+};
+
+using TEventPtr = TIntrusivePtr<TEvent>;
+using TConstEventPtr = TIntrusiveConstPtr<TEvent>;
+
+class IEventProcessor {
+public:
+ virtual void SetOptions(const TEvent::TOutputOptions& options) {
+ Options_ = options;
+ }
+ virtual void ProcessEvent(const TEvent* ev) = 0;
+ virtual bool CheckedProcessEvent(const TEvent* ev) {
+ ProcessEvent(ev);
+ return true;
+ }
+ virtual ~IEventProcessor() = default;
+
+protected:
+ TEvent::TOutputOptions Options_;
+};
+
+class IEventFactory {
+public:
+ virtual TEvent* CreateLogEvent(TEventClass c) = 0;
+ virtual TEventLogFormat CurrentFormat() = 0;
+ virtual TEventClass ClassByName(TStringBuf name) const = 0;
+ virtual TEventClass EventClassBegin() const = 0;
+ virtual TEventClass EventClassEnd() const = 0;
+ virtual ~IEventFactory() = default;
+};
+
+class TUnknownEvent: public TEvent {
+public:
+ TUnknownEvent(TEventTimestamp ts, TEventClass cls)
+ : TEvent(cls, ts)
+ {
+ }
+
+ ~TUnknownEvent() override = default;
+
+ void Save(IOutputStream& /* o */) const override {
+ ythrow yexception() << "TUnknownEvent cannot be saved";
+ }
+
+ void Load(IInputStream& /* i */) override {
+ ythrow yexception() << "TUnknownEvent cannot be loaded";
+ }
+
+ TStringBuf GetName() const override;
+
+private:
+ void DoPrint(IOutputStream& out, EFieldOutputFlags) const override {
+ out << GetName() << "\t" << (size_t)Class;
+ }
+
+ void DoPrintJson(NJson::TJsonWriter& jsonWriter) const override;
+
+ const NProtoBuf::Message* GetProto() const override;
+};
+
+class TEndOfFrameEvent: public TEvent {
+public:
+ enum {
+ EventClass = 0
+ };
+
+ TEndOfFrameEvent(TEventTimestamp ts)
+ : TEvent(TEndOfFrameEvent::EventClass, ts)
+ {
+ }
+
+ ~TEndOfFrameEvent() override = default;
+
+ void Save(IOutputStream& o) const override {
+ (void)o;
+ ythrow yexception() << "TEndOfFrameEvent cannot be saved";
+ }
+
+ void Load(IInputStream& i) override {
+ (void)i;
+ ythrow yexception() << "TEndOfFrameEvent cannot be loaded";
+ }
+
+ TStringBuf GetName() const override;
+
+private:
+ void DoPrint(IOutputStream& out, EFieldOutputFlags) const override {
+ out << GetName();
+ }
+ void DoPrintJson(NJson::TJsonWriter& jsonWriter) const override;
+
+ const NProtoBuf::Message* GetProto() const override;
+};
+
+class ILogFrameEventVisitor {
+public:
+ virtual ~ILogFrameEventVisitor() = default;
+
+ virtual void Visit(const TEvent& event) = 0;
+};
+
+class IWriteFrameCallback : public TAtomicRefCount<IWriteFrameCallback> {
+public:
+ virtual ~IWriteFrameCallback() = default;
+
+ virtual void OnAfterCompress(const TBuffer& compressedFrame, TEventTimestamp startTimestamp, TEventTimestamp endTimestamp) = 0;
+};
+
+using TWriteFrameCallbackPtr = TIntrusivePtr<IWriteFrameCallback>;
+
+class TEventLogFrame {
+public:
+ TEventLogFrame(bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+ TEventLogFrame(IEventLog& parentLog, bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+ TEventLogFrame(IEventLog* parentLog, bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+
+ virtual ~TEventLogFrame() = default;
+
+ void Flush();
+ void SafeFlush();
+
+ void ForceDump() {
+ ForceDump_ = true;
+ }
+
+ template <class T>
+ inline void LogEvent(const T& ev) {
+ if (NeedAlwaysSafeAdd_) {
+ SafeLogEvent(ev);
+ } else {
+ UnSafeLogEvent(ev);
+ }
+ }
+
+ template <class T>
+ inline void LogEvent(TEventTimestamp timestamp, const T& ev) {
+ if (NeedAlwaysSafeAdd_) {
+ SafeLogEvent(timestamp, ev);
+ } else {
+ UnSafeLogEvent(timestamp, ev);
+ }
+ }
+
+ template <class T>
+ inline void UnSafeLogEvent(const T& ev) {
+ if (!IsEventIgnored(ev.ID))
+ LogProtobufEvent(ev.ID, ev);
+ }
+
+ template <class T>
+ inline void UnSafeLogEvent(TEventTimestamp timestamp, const T& ev) {
+ if (!IsEventIgnored(ev.ID))
+ LogProtobufEvent(timestamp, ev.ID, ev);
+ }
+
+ template <class T>
+ inline void SafeLogEvent(const T& ev) {
+ if (!IsEventIgnored(ev.ID)) {
+ TGuard<TMutex> g(Mtx_);
+ LogProtobufEvent(ev.ID, ev);
+ }
+ }
+
+ template <class T>
+ inline void SafeLogEvent(TEventTimestamp timestamp, const T& ev) {
+ if (!IsEventIgnored(ev.ID)) {
+ TGuard<TMutex> g(Mtx_);
+ LogProtobufEvent(timestamp, ev.ID, ev);
+ }
+ }
+
+ void VisitEvents(ILogFrameEventVisitor& visitor, IEventFactory* eventFactory);
+
+ inline bool IsEventIgnored(size_t eventId) const {
+ Y_UNUSED(eventId); // in future we might want to selectively discard only some kinds of messages
+ return !IsDebugModeEnabled() && EvLog_ == nullptr && !ForceDump_;
+ }
+
+ void Enable(IEventLog& evLog) {
+ EvLog_ = &evLog;
+ }
+
+ void Disable() {
+ EvLog_ = nullptr;
+ }
+
+ void SetNeedAlwaysSafeAdd(bool val) {
+ NeedAlwaysSafeAdd_ = val;
+ }
+
+ void SetWriteFrameCallback(TWriteFrameCallbackPtr writeFrameCallback) {
+ WriteFrameCallback_ = writeFrameCallback;
+ }
+
+ void AddMetaFlag(const TString& key, const TString& value) {
+ if (NeedAlwaysSafeAdd_) {
+ TGuard<TMutex> g(Mtx_);
+ MetaFlags_.emplace_back(key, value);
+ } else {
+ MetaFlags_.emplace_back(key, value);
+ }
+ }
+
+protected:
+ void LogProtobufEvent(size_t eventId, const NProtoBuf::Message& ev);
+ void LogProtobufEvent(TEventTimestamp timestamp, size_t eventId, const NProtoBuf::Message& ev);
+
+private:
+ static bool IsDebugModeEnabled() {
+ static struct TSelector {
+ bool Flag;
+
+ TSelector()
+ : Flag(GetEnv("EVLOG_DEBUG") == TStringBuf("1"))
+ {
+ }
+ } selector;
+
+ return selector.Flag;
+ }
+
+ template <class T>
+ void DebugDump(const T& ev);
+
+ // T must be a descendant of NEvClass::TEvent
+ template <class T>
+ inline void LogEventImpl(const T& ev) {
+ if (EvLog_ != nullptr || ForceDump_) {
+ TBuffer& b = Buf_.Buffer();
+ size_t lastSize = b.size();
+ ::Save(&Buf_, ui32(0));
+ ev.FullSaveToBuffer(Buf_);
+ WriteUnaligned<ui32>(b.data() + lastSize, (ui32)(b.size() - lastSize));
+ AddEvent(ev.Timestamp);
+ }
+
+ if (IsDebugModeEnabled()) {
+ DebugDump(ev);
+ }
+ }
+
+ void AddEvent(TEventTimestamp timestamp);
+ void DoInit();
+
+private:
+ TBufferOutput Buf_;
+ TEventTimestamp StartTimestamp_, EndTimestamp_;
+ IEventLog* EvLog_;
+ TMutex Mtx_;
+ bool NeedAlwaysSafeAdd_;
+ bool ForceDump_;
+ TWriteFrameCallbackPtr WriteFrameCallback_;
+ TLogRecord::TMetaFlags MetaFlags_;
+ friend class TEventRecord;
+};
+
+class TSelfFlushLogFrame: public TEventLogFrame, public TAtomicRefCount<TSelfFlushLogFrame> {
+public:
+ TSelfFlushLogFrame(bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+ TSelfFlushLogFrame(IEventLog& parentLog, bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+ TSelfFlushLogFrame(IEventLog* parentLog, bool needAlwaysSafeAdd = false, TWriteFrameCallbackPtr writeFrameCallback = nullptr);
+
+ virtual ~TSelfFlushLogFrame();
+};
+
+using TSelfFlushLogFramePtr = TIntrusivePtr<TSelfFlushLogFrame>;
+
+class IEventLog: public TAtomicRefCount<IEventLog> {
+public:
+ class IErrorCallback {
+ public:
+ virtual ~IErrorCallback() {
+ }
+
+ virtual void OnWriteError() = 0;
+ };
+
+ class ISuccessCallback {
+ public:
+ virtual ~ISuccessCallback() {
+ }
+
+ virtual void OnWriteSuccess(const TBuffer& frameData) = 0;
+ };
+
+ virtual ~IEventLog();
+
+ virtual void ReopenLog() = 0;
+ virtual void CloseLog() = 0;
+ virtual void Flush() = 0;
+ virtual void SetErrorCallback(IErrorCallback*) {
+ }
+ virtual void SetSuccessCallback(ISuccessCallback*) {
+ }
+
+ template <class T>
+ void LogEvent(const T& ev) {
+ TEventLogFrame frame(*this);
+ frame.LogEvent(ev);
+ frame.Flush();
+ }
+
+ virtual bool HasNullBackend() const = 0;
+
+ virtual void WriteFrame(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ TWriteFrameCallbackPtr writeFrameCallback = nullptr,
+ TLogRecord::TMetaFlags metaFlags = {}) = 0;
+};
+
+struct TEventLogBackendOptions {
+ bool UseSyncPageCacheBackend = false;
+ size_t SyncPageCacheBackendBufferSize = 0;
+ size_t SyncPageCacheBackendMaxPendingSize = 0;
+};
+
+class TEventLog: public IEventLog {
+public:
+ /*
+ * Параметр contentformat указывает формат контента лога, например какие могут в логе
+ * встретится классы событий, какие параметры у этих событий, и пр. Старший байт параметра
+ * должен быть нулевым.
+ */
+ TEventLog(const TString& fileName, TEventLogFormat contentFormat, const TEventLogBackendOptions& backendOpts, TMaybe<TEventLogFormat> logFormat);
+ TEventLog(const TString& fileName, TEventLogFormat contentFormat, const TEventLogBackendOptions& backendOpts = {});
+ TEventLog(const TLog& log, TEventLogFormat contentFormat, TEventLogFormat logFormat = COMPRESSED_LOG_FORMAT_V4);
+ TEventLog(TEventLogFormat contentFormat, TEventLogFormat logFormat = COMPRESSED_LOG_FORMAT_V4);
+
+ ~TEventLog() override;
+
+ void ReopenLog() override;
+ void CloseLog() override;
+ void Flush() override;
+ void SetErrorCallback(IErrorCallback* errorCallback) override {
+ ErrorCallback_ = errorCallback;
+ }
+ void SetSuccessCallback(ISuccessCallback* successCallback) override {
+ SuccessCallback_ = successCallback;
+ }
+
+ template <class T>
+ void LogEvent(const T& ev) {
+ TEventLogFrame frame(*this);
+ frame.LogEvent(ev);
+ frame.Flush();
+ }
+
+ bool HasNullBackend() const override {
+ return HasNullBackend_;
+ }
+
+ void WriteFrame(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ TWriteFrameCallbackPtr writeFrameCallback = nullptr,
+ TLogRecord::TMetaFlags metaFlags = {}) override;
+
+private:
+ mutable TLog Log_;
+ TEventLogFormat ContentFormat_;
+ const TEventLogFormat LogFormat_;
+ bool HasNullBackend_;
+ const NBlockCodecs::ICodec* const Lz4hcCodec_;
+ const NBlockCodecs::ICodec* const ZstdCodec_;
+ IErrorCallback* ErrorCallback_ = nullptr;
+ ISuccessCallback* SuccessCallback_ = nullptr;
+};
+
+using TEventLogPtr = TIntrusivePtr<IEventLog>;
+
+class TEventLogWithSlave: public IEventLog {
+public:
+ TEventLogWithSlave(IEventLog& parentLog)
+ : Slave_(&parentLog)
+ {
+ }
+
+ TEventLogWithSlave(const TEventLogPtr& parentLog)
+ : SlavePtr_(parentLog)
+ , Slave_(SlavePtr_.Get())
+ {
+ }
+
+ ~TEventLogWithSlave() override {
+ try {
+ Slave().Flush();
+ } catch (...) {
+ }
+ }
+
+ void Flush() override {
+ Slave().Flush();
+ }
+
+ void ReopenLog() override {
+ return Slave().ReopenLog();
+ }
+ void CloseLog() override {
+ return Slave().CloseLog();
+ }
+
+ bool HasNullBackend() const override {
+ return Slave().HasNullBackend();
+ }
+
+ void WriteFrame(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ TWriteFrameCallbackPtr writeFrameCallback = nullptr,
+ TLogRecord::TMetaFlags metaFlags = {}) override {
+ Slave().WriteFrame(buffer, startTimestamp, endTimestamp, writeFrameCallback, std::move(metaFlags));
+ }
+
+ void SetErrorCallback(IErrorCallback* errorCallback) override {
+ Slave().SetErrorCallback(errorCallback);
+ }
+
+ void SetSuccessCallback(ISuccessCallback* successCallback) override {
+ Slave().SetSuccessCallback(successCallback);
+ }
+
+protected:
+ inline IEventLog& Slave() const {
+ return *Slave_;
+ }
+
+private:
+ TEventLogPtr SlavePtr_;
+ IEventLog* Slave_ = nullptr;
+};
+
+extern TAtomic eventlogFrameCounter;
+
+class TProtobufEventProcessor: public IEventProcessor {
+public:
+ void ProcessEvent(const TEvent* ev) override final {
+ ProcessEvent(ev, &Cout);
+ }
+
+ void ProcessEvent(const TEvent* ev, IOutputStream *out) {
+ UpdateEventState(ev);
+ DoProcessEvent(ev, out);
+ EventState_.PrevEventTime = ev->Timestamp;
+ }
+protected:
+ virtual void DoProcessEvent(const TEvent * ev, IOutputStream *out) {
+ ev->Print(*out, Options_, EventState_);
+ (*out) << Endl;
+ }
+ ui32 CurrentFrameId_ = Max<ui32>();
+ TEvent::TEventState EventState_;
+
+private:
+ void UpdateEventState(const TEvent *ev) {
+ if (ev->FrameId != CurrentFrameId_) {
+ EventState_.FrameStartTime = ev->Timestamp;
+ EventState_.PrevEventTime = ev->Timestamp;
+ CurrentFrameId_ = ev->FrameId;
+ }
+ }
+};
+
+class TProtobufEventFactory: public IEventFactory {
+public:
+ TProtobufEventFactory(NProtoBuf::TEventFactory* factory = NProtoBuf::TEventFactory::Instance())
+ : EventFactory_(factory)
+ {
+ }
+
+ TEvent* CreateLogEvent(TEventClass c) override;
+
+ TEventLogFormat CurrentFormat() override {
+ return 0;
+ }
+
+ TEventClass ClassByName(TStringBuf name) const override;
+
+ TEventClass EventClassBegin() const override;
+
+ TEventClass EventClassEnd() const override;
+
+ ~TProtobufEventFactory() override = default;
+
+private:
+ NProtoBuf::TEventFactory* EventFactory_;
+};
+
+THolder<TEvent> MakeProtobufLogEvent(TEventTimestamp ts, TEventClass eventId, google::protobuf::Message& ev);
+
+namespace NEvClass {
+ IEventFactory* Factory();
+ IEventProcessor* Processor();
+}
diff --git a/library/cpp/eventlog/eventlog_int.cpp b/library/cpp/eventlog/eventlog_int.cpp
new file mode 100644
index 0000000000..faa8c42cbe
--- /dev/null
+++ b/library/cpp/eventlog/eventlog_int.cpp
@@ -0,0 +1,12 @@
+#include "eventlog_int.h"
+
+#include <util/string/cast.h>
+
+TMaybe<TEventLogFormat> ParseEventLogFormat(TStringBuf str) {
+ EEventLogFormat format;
+ if (TryFromString(str, format)) {
+ return static_cast<TEventLogFormat>(format);
+ } else {
+ return {};
+ }
+}
diff --git a/library/cpp/eventlog/eventlog_int.h b/library/cpp/eventlog/eventlog_int.h
new file mode 100644
index 0000000000..eb00fecfab
--- /dev/null
+++ b/library/cpp/eventlog/eventlog_int.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <util/stream/output.h>
+#include <util/generic/maybe.h>
+#include <util/generic/utility.h>
+#include <util/generic/yexception.h>
+#include <util/ysaveload.h>
+
+using TEventClass = ui32;
+using TEventLogFormat = ui32;
+using TEventTimestamp = ui64;
+
+constexpr TStringBuf COMPRESSED_LOG_FRAME_SYNC_DATA =
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\xfe\x00\x00\xff\xff\x00\x00\xff\xff\x00"
+ "\x00\xff\xff\x00\x00\xff\xff\x00\x00\xff\xff\x00\x00\xff"
+ "\xff\x00\x00\xff\xff\x00\x00\xff"sv;
+
+static_assert(COMPRESSED_LOG_FRAME_SYNC_DATA.size() == 64);
+
+/*
+ * Коды форматов логов. Форматом лога считается формат служебных
+ * структур лога. К примеру формат заголовка, наличие компрессии, и т.д.
+ * Имеет значение только 1 младший байт.
+ */
+
+enum EEventLogFormat : TEventLogFormat {
+ // Формат версии 1. Используется компрессор LZQ.
+ COMPRESSED_LOG_FORMAT_V1 = 1,
+
+ // Формат версии 2. Используется компрессор ZLIB. Добавлены CRC заголовка и данных,
+ // поле типа компрессора.
+ COMPRESSED_LOG_FORMAT_V2 = 2,
+
+ // Формат версии 3. Используется компрессор ZLIB. В начинке фреймов перед каждым событием добавлен его размер.
+ COMPRESSED_LOG_FORMAT_V3 = 3,
+
+ // Lz4hc codec + zlib
+ COMPRESSED_LOG_FORMAT_V4 = 4 /* "zlib_lz4" */,
+
+ // zstd
+ COMPRESSED_LOG_FORMAT_V5 = 5 /* "zstd" */,
+};
+
+TMaybe<TEventLogFormat> ParseEventLogFormat(TStringBuf str);
+
+#pragma pack(push, 1)
+
+struct TCompressedFrameBaseHeader {
+ TEventLogFormat Format;
+ ui32 Length; // Длина остатка фрейма в байтах, после этого заголовка
+ ui32 FrameId;
+};
+
+struct TCompressedFrameHeader {
+ TEventTimestamp StartTimestamp;
+ TEventTimestamp EndTimestamp;
+ ui32 UncompressedDatalen; // Длина данных, которые были закомпрессированы
+ ui32 PayloadChecksum; // В логе версии 1 поле не используется
+};
+
+struct TCompressedFrameHeader2: public TCompressedFrameHeader {
+ ui8 CompressorVersion; // Сейчас не используется
+ ui32 HeaderChecksum;
+};
+
+#pragma pack(pop)
+
+Y_DECLARE_PODTYPE(TCompressedFrameBaseHeader);
+Y_DECLARE_PODTYPE(TCompressedFrameHeader);
+Y_DECLARE_PODTYPE(TCompressedFrameHeader2);
diff --git a/library/cpp/eventlog/events_extension.h b/library/cpp/eventlog/events_extension.h
new file mode 100644
index 0000000000..0cf062f959
--- /dev/null
+++ b/library/cpp/eventlog/events_extension.h
@@ -0,0 +1,161 @@
+#pragma once
+
+#include "event_field_output.h"
+
+#include <google/protobuf/descriptor.h>
+#include <google/protobuf/message.h>
+
+#include <library/cpp/threading/atomic/bool.h>
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/generic/map.h>
+#include <util/generic/deque.h>
+#include <util/generic/singleton.h>
+#include <util/string/hex.h>
+#include <util/system/guard.h>
+#include <util/system/mutex.h>
+
+namespace NProtoBuf {
+ class TEventFactory {
+ public:
+ typedef ::google::protobuf::Message Message;
+ typedef void (*TEventSerializer)(const Message* event, IOutputStream& output, EFieldOutputFlags flags);
+ typedef void (*TRegistrationFunc)();
+
+ private:
+ class TFactoryItem {
+ public:
+ TFactoryItem(const Message* prototype, const TEventSerializer serializer)
+ : Prototype_(prototype)
+ , Serializer_(serializer)
+ {
+ }
+
+ TStringBuf GetName() const {
+ return Prototype_->GetDescriptor()->name();
+ }
+
+ Message* Create() const {
+ return Prototype_->New();
+ }
+
+ void PrintEvent(const Message* event, IOutputStream& out, EFieldOutputFlags flags) const {
+ (*Serializer_)(event, out, flags);
+ }
+
+ private:
+ const Message* Prototype_;
+ const TEventSerializer Serializer_;
+ };
+
+ typedef TMap<size_t, TFactoryItem> TFactoryMap;
+
+ public:
+ TEventFactory()
+ : FactoryItems_()
+ {
+ }
+
+ void ScheduleRegistration(TRegistrationFunc func) {
+ EventRegistrators_.push_back(func);
+ }
+
+ void RegisterEvent(size_t eventId, const Message* prototype, const TEventSerializer serializer) {
+ FactoryItems_.insert(std::make_pair(eventId, TFactoryItem(prototype, serializer)));
+ }
+
+ size_t IdByName(TStringBuf eventname) {
+ DelayedRegistration();
+ for (TFactoryMap::const_iterator it = FactoryItems_.begin(); it != FactoryItems_.end(); ++it) {
+ if (it->second.GetName() == eventname)
+ return it->first;
+ }
+
+ ythrow yexception() << "do not know event '" << eventname << "'";
+ }
+
+ TStringBuf NameById(size_t id) {
+ DelayedRegistration();
+ TFactoryMap::const_iterator it = FactoryItems_.find(id);
+ return it != FactoryItems_.end() ? it->second.GetName() : TStringBuf();
+ }
+
+ Message* CreateEvent(size_t eventId) {
+ DelayedRegistration();
+ TFactoryMap::const_iterator it = FactoryItems_.find(eventId);
+
+ if (it != FactoryItems_.end()) {
+ return it->second.Create();
+ }
+
+ return nullptr;
+ }
+
+ const TMap<size_t, TFactoryItem>& FactoryItems() {
+ DelayedRegistration();
+ return FactoryItems_;
+ }
+
+ void PrintEvent(
+ size_t eventId,
+ const Message* event,
+ IOutputStream& output,
+ EFieldOutputFlags flags = {}) {
+ DelayedRegistration();
+ TFactoryMap::const_iterator it = FactoryItems_.find(eventId);
+
+ if (it != FactoryItems_.end()) {
+ it->second.PrintEvent(event, output, flags);
+ }
+ }
+
+ static TEventFactory* Instance() {
+ return Singleton<TEventFactory>();
+ }
+
+ private:
+ void DelayedRegistration() {
+ if (!DelayedRegistrationDone_) {
+ TGuard<TMutex> guard(MutexEventRegistrators_);
+ Y_UNUSED(guard);
+ while (!EventRegistrators_.empty()) {
+ EventRegistrators_.front()();
+ EventRegistrators_.pop_front();
+ }
+ DelayedRegistrationDone_ = true;
+ }
+ }
+
+ private:
+ TMap<size_t, TFactoryItem> FactoryItems_;
+ TDeque<TRegistrationFunc> EventRegistrators_;
+ NAtomic::TBool DelayedRegistrationDone_ = false;
+ TMutex MutexEventRegistrators_;
+ };
+
+ template <typename T>
+ void PrintAsBytes(const T& obj, IOutputStream& output) {
+ const ui8* b = reinterpret_cast<const ui8*>(&obj);
+ const ui8* e = b + sizeof(T);
+ const char* delim = "";
+
+ while (b != e) {
+ output << delim;
+ output << (int)*b++;
+ delim = ".";
+ }
+ }
+
+ template <typename T>
+ void PrintAsHex(const T& obj, IOutputStream& output) {
+ output << "0x";
+ output << HexEncode(&obj, sizeof(T));
+ }
+
+ inline void PrintAsBase64(TStringBuf data, IOutputStream& output) {
+ if (!data.empty()) {
+ output << Base64Encode(data);
+ }
+ }
+
+}
diff --git a/library/cpp/eventlog/iterator.cpp b/library/cpp/eventlog/iterator.cpp
new file mode 100644
index 0000000000..71f955bca8
--- /dev/null
+++ b/library/cpp/eventlog/iterator.cpp
@@ -0,0 +1,88 @@
+#include "iterator.h"
+
+#include <library/cpp/streams/growing_file_input/growing_file_input.h>
+
+#include <util/string/cast.h>
+#include <util/string/split.h>
+#include <util/string/type.h>
+#include <util/stream/file.h>
+
+using namespace NEventLog;
+
+namespace {
+ inline TIntrusivePtr<TEventFilter> ConstructEventFilter(bool enableEvents, const TString& evList, IEventFactory* fac) {
+ if (evList.empty()) {
+ return nullptr;
+ }
+
+ TVector<TString> events;
+
+ StringSplitter(evList).Split(',').SkipEmpty().Collect(&events);
+ if (events.empty()) {
+ return nullptr;
+ }
+
+ TIntrusivePtr<TEventFilter> filter(new TEventFilter(enableEvents));
+
+ for (const auto& event : events) {
+ if (IsNumber(event))
+ filter->AddEventClass(FromString<size_t>(event));
+ else
+ filter->AddEventClass(fac->ClassByName(event));
+ }
+
+ return filter;
+ }
+
+ struct TIterator: public IIterator {
+ inline TIterator(const TOptions& o, IEventFactory* fac)
+ : First(true)
+ {
+ if (o.FileName.size()) {
+ if (o.ForceStreamMode || o.TailFMode) {
+ FileInput.Reset(o.TailFMode ? (IInputStream*)new TGrowingFileInput(o.FileName) : (IInputStream*)new TUnbufferedFileInput(o.FileName));
+ FrameStream.Reset(new TFrameStreamer(*FileInput, fac, o.FrameFilter));
+ } else {
+ FrameStream.Reset(new TFrameStreamer(o.FileName, o.StartTime, o.EndTime, o.MaxRequestDuration, fac, o.FrameFilter));
+ }
+ } else {
+ FrameStream.Reset(new TFrameStreamer(*o.Input, fac, o.FrameFilter));
+ }
+
+ EvFilter = ConstructEventFilter(o.EnableEvents, o.EvList, fac);
+ EventStream.Reset(new TEventStreamer(*FrameStream, o.StartTime, o.EndTime, o.ForceStrongOrdering, EvFilter, o.ForceLosslessStrongOrdering));
+ }
+
+ TConstEventPtr Next() override {
+ if (First) {
+ First = false;
+
+ if (!EventStream->Avail()) {
+ return nullptr;
+ }
+ } else {
+ if (!EventStream->Next()) {
+ return nullptr;
+ }
+ }
+
+ return **EventStream;
+ }
+
+ THolder<IInputStream> FileInput;
+ THolder<TFrameStreamer> FrameStream;
+ TIntrusivePtr<TEventFilter> EvFilter;
+ THolder<TEventStreamer> EventStream;
+ bool First;
+ };
+}
+
+IIterator::~IIterator() = default;
+
+THolder<IIterator> NEventLog::CreateIterator(const TOptions& o, IEventFactory* fac) {
+ return MakeHolder<TIterator>(o, fac);
+}
+
+THolder<IIterator> NEventLog::CreateIterator(const TOptions& o) {
+ return MakeHolder<TIterator>(o, NEvClass::Factory());
+}
diff --git a/library/cpp/eventlog/iterator.h b/library/cpp/eventlog/iterator.h
new file mode 100644
index 0000000000..71a61ed549
--- /dev/null
+++ b/library/cpp/eventlog/iterator.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <util/stream/input.h>
+#include <util/generic/ptr.h>
+#include <util/generic/string.h>
+#include <util/generic/iterator.h>
+
+#include "eventlog.h"
+#include "logparser.h"
+
+namespace NEventLog {
+ struct TOptions {
+ inline TOptions& SetFileName(const TString& fileName) {
+ FileName = fileName;
+
+ return *this;
+ }
+
+ inline TOptions& SetForceStrongOrdering(bool v) {
+ if(!ForceLosslessStrongOrdering) {
+ ForceStrongOrdering = v;
+ }
+
+ return *this;
+ }
+
+ ui64 StartTime = MIN_START_TIME;
+ ui64 EndTime = MAX_END_TIME;
+ ui64 MaxRequestDuration = MAX_REQUEST_DURATION;
+ TString FileName;
+ bool ForceStrongOrdering = false;
+ bool ForceWeakOrdering = false;
+ bool EnableEvents = true;
+ TString EvList;
+ bool ForceStreamMode = false;
+ bool ForceLosslessStrongOrdering = false;
+ bool TailFMode = false;
+ IInputStream* Input = &Cin;
+ IFrameFilterRef FrameFilter;
+ };
+
+ class IIterator: public TInputRangeAdaptor<IIterator> {
+ public:
+ virtual ~IIterator();
+
+ virtual TConstEventPtr Next() = 0;
+ };
+
+ THolder<IIterator> CreateIterator(const TOptions& o);
+ THolder<IIterator> CreateIterator(const TOptions& o, IEventFactory* fac);
+}
diff --git a/library/cpp/eventlog/logparser.cpp b/library/cpp/eventlog/logparser.cpp
new file mode 100644
index 0000000000..6f8959f788
--- /dev/null
+++ b/library/cpp/eventlog/logparser.cpp
@@ -0,0 +1,814 @@
+#include "logparser.h"
+#include "evdecoder.h"
+
+#include <util/stream/output.h>
+#include <util/stream/zlib.h>
+#include <util/digest/murmur.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/scope.h>
+#include <util/generic/hash_set.h>
+#include <util/string/split.h>
+#include <util/string/cast.h>
+#include <util/string/escape.h>
+#include <util/string/builder.h>
+
+#include <contrib/libs/re2/re2/re2.h>
+
+#include <algorithm>
+#include <array>
+
+namespace {
+ bool FastforwardUntilSyncHeader(IInputStream* in) {
+ // Usually this function finds the correct header at the first hit
+ std::array<char, COMPRESSED_LOG_FRAME_SYNC_DATA.size()> buffer;
+ if (in->Load(buffer.data(), buffer.size()) != buffer.size()) {
+ return false;
+ }
+
+ auto begin = buffer.begin();
+
+ for (;;) {
+ if (std::mismatch(
+ begin, buffer.end(),
+ COMPRESSED_LOG_FRAME_SYNC_DATA.begin()).first == buffer.end() &&
+ std::mismatch(
+ buffer.begin(), begin,
+ COMPRESSED_LOG_FRAME_SYNC_DATA.begin() + (buffer.end() - begin)).first == begin) {
+ return true;
+ }
+ if (!in->ReadChar(*begin)) {
+ return false;
+ }
+ ++begin;
+ if (begin == buffer.end()) {
+ begin = buffer.begin();
+ }
+ }
+ }
+
+ bool HasCorrectChecksum(const TFrameHeader& header) {
+ // Calculating hash over all the fields of the read header except for the field with the hash of the header itself.
+ const size_t baseSize = sizeof(TCompressedFrameBaseHeader) + sizeof(TCompressedFrameHeader2) - sizeof(ui32);
+ const ui32 checksum = MurmurHash<ui32>(&header.Basehdr, baseSize);
+ return checksum == header.Framehdr.HeaderChecksum;
+ }
+
+ TMaybe<TFrameHeader> FindNextFrameHeader(IInputStream* in) {
+ for (;;) {
+ if (FastforwardUntilSyncHeader(in)) {
+ try {
+ return TFrameHeader(*in);
+ } catch (const TFrameLoadError& err) {
+ Cdbg << err.what() << Endl;
+ in->Skip(err.SkipAfter);
+ }
+ } else {
+ return Nothing();
+ }
+ }
+ }
+
+ std::pair<TMaybe<TFrameHeader>, TStringBuf> FindNextFrameHeader(TStringBuf span) {
+ for (;;) {
+ auto iter = std::search(
+ span.begin(), span.end(),
+ COMPRESSED_LOG_FRAME_SYNC_DATA.begin(), COMPRESSED_LOG_FRAME_SYNC_DATA.end());
+ const size_t offset = iter - span.begin();
+
+ if (offset != span.size()) {
+ span = span.substr(offset);
+ try {
+ TMemoryInput in(
+ span.data() + COMPRESSED_LOG_FRAME_SYNC_DATA.size(),
+ span.size() - COMPRESSED_LOG_FRAME_SYNC_DATA.size());
+ return {TFrameHeader(in), span};
+ } catch (const TFrameLoadError& err) {
+ Cdbg << err.what() << Endl;
+ span = span.substr(err.SkipAfter);
+ }
+ } else {
+ return {Nothing(), {}};
+ }
+ }
+ }
+
+ size_t FindFrames(const TStringBuf span, ui64 start, ui64 end, ui64 maxRequestDuration) {
+ Y_ENSURE(start <= end);
+
+ const auto leftTimeBound = start - Min(start, maxRequestDuration);
+ const auto rightTimeBound = end + Min(maxRequestDuration, Max<ui64>() - end);
+
+ TStringBuf subspan = span;
+ TMaybe<TFrameHeader> maybeLeftFrame;
+ std::tie(maybeLeftFrame, subspan) = FindNextFrameHeader(subspan);
+
+ if (!maybeLeftFrame || maybeLeftFrame->EndTime() > rightTimeBound) {
+ return span.size();
+ }
+
+ if (maybeLeftFrame->StartTime() > leftTimeBound) {
+ return 0;
+ }
+
+ while (subspan.size() > maybeLeftFrame->FullLength()) {
+ const auto mid = subspan.data() + subspan.size() / 2;
+ auto [midFrame, rightHalfSpan] = FindNextFrameHeader({mid, subspan.data() + subspan.size()});
+ if (!midFrame) {
+ // If mid is in the middle of the last frame, here we will lose it meaning that
+ // we will find previous frame as the result.
+ // This is fine because we will iterate frames starting from that.
+ subspan = subspan.substr(0, subspan.size() / 2);
+ continue;
+ }
+ if (midFrame->StartTime() <= leftTimeBound) {
+ maybeLeftFrame = midFrame;
+ subspan = rightHalfSpan;
+ } else {
+ subspan = subspan.substr(0, subspan.size() / 2);
+ }
+ }
+
+ return subspan.data() - span.data();
+ }
+}
+
+TFrameHeader::TFrameHeader(IInputStream& in) {
+ try {
+ ::Load(&in, Basehdr);
+
+ Y_ENSURE(Basehdr.Length, "Empty frame additional data");
+
+ ::Load(&in, Framehdr);
+ switch (LogFormat()) {
+ case COMPRESSED_LOG_FORMAT_V1:
+ break;
+
+ case COMPRESSED_LOG_FORMAT_V2:
+ case COMPRESSED_LOG_FORMAT_V3:
+ case COMPRESSED_LOG_FORMAT_V4:
+ case COMPRESSED_LOG_FORMAT_V5:
+ Y_ENSURE(!Framehdr.CompressorVersion, "Wrong compressor");
+
+ Y_ENSURE(HasCorrectChecksum(*this), "Wrong header checksum");
+ break;
+
+ default:
+ ythrow yexception() << "Unsupported log structure format";
+ };
+
+ Y_ENSURE(Framehdr.StartTimestamp <= Framehdr.EndTimestamp, "Wrong start/end timestamps");
+
+ // Each frame must contain at least one event.
+ Y_ENSURE(Framehdr.UncompressedDatalen, "Empty frame payload");
+ } catch (...) {
+ TString location = "";
+ if (const auto* cnt = dynamic_cast<TCountingInput *>(&in)) {
+ location = "@ " + ToString(cnt->Counter());
+ }
+ ythrow TFrameLoadError(FrameLength()) << "Frame Load Error" << location << ": " << CurrentExceptionMessage();
+ }
+}
+
+TFrame::TFrame(IInputStream& in, TFrameHeader header, IEventFactory* fac)
+ : TFrameHeader(header)
+ , Limiter_(MakeHolder<TLengthLimitedInput>(&in, header.FrameLength()))
+ , Fac_(fac)
+{
+ if (auto* cnt = dynamic_cast<TCountingInput *>(&in)) {
+ Address_ = cnt->Counter() - sizeof(TFrameHeader);
+ } else {
+ Address_ = 0;
+ }
+}
+
+TFrame::TIterator TFrame::GetIterator(TIntrusiveConstPtr<TEventFilter> eventFilter) const {
+ if (EventsCache_.empty()) {
+ for (TFrameDecoder decoder{*this, eventFilter.Get()}; decoder.Avail(); decoder.Next()) {
+ EventsCache_.emplace_back(*decoder);
+ }
+ }
+
+ return TIterator(*this, eventFilter);
+}
+
+void TFrame::ClearEventsCache() const {
+ EventsCache_.clear();
+}
+
+TString TFrame::GetCompressedFrame() const {
+ const auto left = Limiter_->Left();
+ TString payload = Limiter_->ReadAll();
+ Y_ENSURE(payload.size() == left, "Could not read frame payload: premature end of stream");
+ const ui32 checksum = MurmurHash<ui32>(payload.data(), payload.size());
+ Y_ENSURE(checksum == Framehdr.PayloadChecksum, "Invalid frame checksum");
+
+ return payload;
+}
+
+TString TFrame::GetRawFrame() const {
+ TString frameBuf = GetCompressedFrame();
+ TStringInput sin(frameBuf);
+ return TZLibDecompress{&sin}.ReadAll();
+}
+
+TFrame::TIterator::TIterator(const TFrame& frame, TIntrusiveConstPtr<TEventFilter> filter)
+ : Frame_(frame)
+ , Size_(frame.EventsCache_.size())
+ , Filter_(filter)
+ , Index_(0)
+{
+ SkipToValidEvent();
+}
+
+TConstEventPtr TFrame::TIterator::operator*() const {
+ return Frame_.GetEvent(Index_);
+}
+
+bool TFrame::TIterator::Next() {
+ Index_++;
+ SkipToValidEvent();
+ return Index_ < Size_;
+}
+
+void TFrame::TIterator::SkipToValidEvent() {
+ if (!Filter_) {
+ return;
+ }
+
+ for (; Index_ < Size_; ++Index_) {
+ if (Filter_->EventAllowed(Frame_.GetEvent(Index_)->Class)) {
+ break;
+ }
+ }
+}
+
+TMaybe<TFrame> FindNextFrame(IInputStream* in, IEventFactory* eventFactory) {
+ if (auto header = FindNextFrameHeader(in)) {
+ return TFrame{*in, *header, eventFactory};
+ } else {
+ return Nothing();
+ }
+}
+
+TContainsEventFrameFilter::TContainsEventFrameFilter(const TString& unparsedMatchGroups, const IEventFactory* eventFactory) {
+ TVector<TStringBuf> tokens;
+
+ SplitWithEscaping(tokens, unparsedMatchGroups, "/");
+
+ // Amount of match groups
+ size_t size = tokens.size();
+ MatchGroups.resize(size);
+
+ for (size_t i = 0; i < size; i++) {
+ TMatchGroup& group = MatchGroups[i];
+ TVector<TStringBuf> groupTokens;
+ SplitWithEscaping(groupTokens, tokens[i], ":");
+
+ Y_ENSURE(groupTokens.size() == 3);
+
+ try {
+ group.EventID = eventFactory->ClassByName(groupTokens[0]);
+ } catch (yexception& e) {
+ if (!TryFromString<TEventClass>(groupTokens[0], group.EventID)) {
+ e << "\nAppend:\n" << "Cannot derive EventId from EventType: " << groupTokens[0];
+ throw e;
+ }
+ }
+
+ group.FieldName = groupTokens[1];
+ group.ValueToMatch = UnescapeCharacters(groupTokens[2], "/:");
+ }
+}
+
+bool TContainsEventFrameFilter::FrameAllowed(const TFrame& frame) const {
+ THashSet<size_t> toMatchSet;
+ for (size_t i = 0; i < MatchGroups.size(); i++) {
+ toMatchSet.insert(i);
+ }
+
+ for (auto it = frame.GetIterator(); it.Avail(); it.Next()) {
+ TConstEventPtr event(*it);
+ TVector<size_t> indicesToErase;
+
+ if (!toMatchSet.empty()) {
+ const NProtoBuf::Message* message = event->GetProto();
+ const google::protobuf::Descriptor* descriptor = message->GetDescriptor();
+ const google::protobuf::Reflection* reflection = message->GetReflection();
+
+ Y_ENSURE(descriptor);
+ Y_ENSURE(reflection);
+
+ for (size_t groupIndex : toMatchSet) {
+ const TMatchGroup& group = MatchGroups[groupIndex];
+
+ if (event->Class == group.EventID) {
+ TVector<TString> parts = StringSplitter(group.FieldName).Split('.').ToList<TString>();
+ TString lastPart = std::move(parts.back());
+ parts.pop_back();
+
+ for (auto part : parts) {
+ auto fieldDescriptor = descriptor->FindFieldByName(part);
+ Y_ENSURE(fieldDescriptor, "Cannot find field \"" + part + "\". Full fieldname is \"" + group.FieldName + "\".");
+
+ message = &reflection->GetMessage(*message, fieldDescriptor);
+ descriptor = message->GetDescriptor();
+ reflection = message->GetReflection();
+
+ Y_ENSURE(descriptor);
+ Y_ENSURE(reflection);
+ }
+
+ const google::protobuf::FieldDescriptor* fieldDescriptor = descriptor->FindFieldByName(lastPart);
+ Y_ENSURE(fieldDescriptor, "Cannot find field \"" + lastPart + "\". Full fieldname is \"" + group.FieldName + "\".");
+
+ TString fieldValue = GetEventFieldAsString(message, fieldDescriptor, reflection);
+ if (re2::RE2::FullMatch(fieldValue, group.ValueToMatch)) {
+ indicesToErase.push_back(groupIndex);
+ }
+ }
+ }
+
+ for (size_t idx : indicesToErase) {
+ toMatchSet.erase(idx);
+ }
+
+ if (toMatchSet.empty()) {
+ return true;
+ }
+ }
+ }
+
+ return toMatchSet.empty();
+}
+
+void SplitWithEscaping(TVector<TStringBuf>& tokens, const TStringBuf& stringToSplit, const TStringBuf& externalCharacterSet) {
+ size_t tokenStart = 0;
+ const TString characterSet = TString::Join("\\", externalCharacterSet);
+
+ for (size_t position = stringToSplit.find_first_of(characterSet); position != TString::npos; position = stringToSplit.find_first_of(characterSet, position + 1)) {
+ if (stringToSplit[position] == '\\') {
+ position++;
+ } else {
+ if (tokenStart != position) {
+ tokens.push_back(TStringBuf(stringToSplit, tokenStart, position - tokenStart));
+ }
+ tokenStart = position + 1;
+ }
+ }
+
+ if (tokenStart < stringToSplit.size()) {
+ tokens.push_back(TStringBuf(stringToSplit, tokenStart, stringToSplit.size() - tokenStart));
+ }
+}
+
+TString UnescapeCharacters(const TStringBuf& stringToUnescape, const TStringBuf& characterSet) {
+ TStringBuilder stringBuilder;
+ size_t tokenStart = 0;
+
+ for (size_t position = stringToUnescape.find('\\', 0u); position != TString::npos; position = stringToUnescape.find('\\', position + 2)) {
+ if (position + 1 < stringToUnescape.size() && characterSet.find(stringToUnescape[position + 1]) != TString::npos) {
+ stringBuilder << TStringBuf(stringToUnescape, tokenStart, position - tokenStart);
+ tokenStart = position + 1;
+ }
+ }
+
+ if (tokenStart < stringToUnescape.size()) {
+ stringBuilder << TStringBuf(stringToUnescape, tokenStart, stringToUnescape.size() - tokenStart);
+ }
+
+ return stringBuilder;
+}
+
+TString GetEventFieldAsString(const NProtoBuf::Message* message, const google::protobuf::FieldDescriptor* fieldDescriptor, const google::protobuf::Reflection* reflection) {
+ Y_ENSURE(message);
+ Y_ENSURE(fieldDescriptor);
+ Y_ENSURE(reflection);
+
+ TString result;
+ switch (fieldDescriptor->type()) {
+ case google::protobuf::FieldDescriptor::Type::TYPE_DOUBLE:
+ result = ToString(reflection->GetDouble(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_FLOAT:
+ result = ToString(reflection->GetFloat(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_BOOL:
+ result = ToString(reflection->GetBool(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_INT32:
+ result = ToString(reflection->GetInt32(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_UINT32:
+ result = ToString(reflection->GetUInt32(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_INT64:
+ result = ToString(reflection->GetInt64(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_UINT64:
+ result = ToString(reflection->GetUInt64(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_STRING:
+ result = ToString(reflection->GetString(*message, fieldDescriptor));
+ break;
+ case google::protobuf::FieldDescriptor::Type::TYPE_ENUM:
+ {
+ const NProtoBuf::EnumValueDescriptor* enumValueDescriptor = reflection->GetEnum(*message, fieldDescriptor);
+ result = ToString(enumValueDescriptor->name());
+ }
+ break;
+ default:
+ throw yexception() << "GetEventFieldAsString for type " << fieldDescriptor->type_name() << " is not implemented.";
+ }
+ return result;
+}
+
+TFrameStreamer::TFrameStreamer(IInputStream& s, IEventFactory* fac, IFrameFilterRef ff)
+ : In_(&s)
+ , FrameFilter_(ff)
+ , EventFactory_(fac)
+{
+ Frame_ = FindNextFrame(&In_, EventFactory_);
+
+ SkipToAllowedFrame();
+}
+
+TFrameStreamer::TFrameStreamer(
+ const TString& fileName,
+ ui64 startTime,
+ ui64 endTime,
+ ui64 maxRequestDuration,
+ IEventFactory* fac,
+ IFrameFilterRef ff)
+ : File_(TBlob::FromFile(fileName))
+ , MemoryIn_(File_.Data(), File_.Size())
+ , In_(&MemoryIn_)
+ , StartTime_(startTime)
+ , EndTime_(endTime)
+ , CutoffTime_(endTime + Min(maxRequestDuration, Max<ui64>() - endTime))
+ , FrameFilter_(ff)
+ , EventFactory_(fac)
+{
+ In_.Skip(FindFrames(File_.AsStringBuf(), startTime, endTime, maxRequestDuration));
+ Frame_ = FindNextFrame(&In_, fac);
+ SkipToAllowedFrame();
+}
+
+TFrameStreamer::~TFrameStreamer() = default;
+
+bool TFrameStreamer::Avail() const {
+ return Frame_.Defined();
+}
+
+const TFrame& TFrameStreamer::operator*() const {
+ Y_ENSURE(Frame_, "Frame streamer depleted");
+
+ return *Frame_;
+}
+
+bool TFrameStreamer::Next() {
+ DoNext();
+ SkipToAllowedFrame();
+
+ return Frame_.Defined();
+}
+
+bool TFrameStreamer::AllowedTimeRange(const TFrame& frame) const {
+ const bool allowedStartTime = (StartTime_ == 0) || ((StartTime_ <= frame.StartTime()) && (frame.StartTime() <= EndTime_));
+ const bool allowedEndTime = (EndTime_ == 0) || ((StartTime_ <= frame.EndTime()) && (frame.EndTime() <= EndTime_));
+ return allowedStartTime || allowedEndTime;
+}
+
+bool TFrameStreamer::DoNext() {
+ if (!Frame_) {
+ return false;
+ }
+ In_.Skip(Frame_->Limiter_->Left());
+ Frame_ = FindNextFrame(&In_, EventFactory_);
+
+ if (Frame_ && CutoffTime_ > 0 && Frame_->EndTime() > CutoffTime_) {
+ Frame_.Clear();
+ }
+
+ return Frame_.Defined();
+}
+
+namespace {
+ struct TDecodeBuffer {
+ TDecodeBuffer(const TString codec, IInputStream& src, size_t bs) {
+ TBuffer from(bs);
+
+ {
+ TBufferOutput b(from);
+ TransferData(&src, &b);
+ }
+
+ NBlockCodecs::Codec(codec)->Decode(from, DecodeBuffer);
+ }
+
+ explicit TDecodeBuffer(IInputStream& src) {
+ TBufferOutput b(DecodeBuffer);
+ TransferData(&src, &b);
+ }
+
+ TBuffer DecodeBuffer;
+ };
+
+ class TBlockCodecStream: private TDecodeBuffer, public TBufferInput {
+ public:
+ TBlockCodecStream(const TString codec, IInputStream& src, size_t bs)
+ : TDecodeBuffer(codec, src, bs)
+ , TBufferInput(DecodeBuffer)
+ {}
+
+ explicit TBlockCodecStream(IInputStream& src)
+ : TDecodeBuffer(src)
+ , TBufferInput(DecodeBuffer)
+ {}
+ };
+}
+
+TFrameDecoder::TFrameDecoder(const TFrame& fr, const TEventFilter* const filter, bool strict, bool withRawData)
+ : Frame_(fr)
+ , Event_(nullptr)
+ , Flt_(filter)
+ , Fac_(fr.Fac_)
+ , EndOfFrame_(new TEndOfFrameEvent(Frame_.EndTime()))
+ , Strict_(strict)
+ , WithRawData_(withRawData)
+{
+ switch (fr.LogFormat()) {
+ case COMPRESSED_LOG_FORMAT_V2:
+ case COMPRESSED_LOG_FORMAT_V3:
+ case COMPRESSED_LOG_FORMAT_V4:
+ case COMPRESSED_LOG_FORMAT_V5: {
+ const auto payload = fr.GetCompressedFrame();
+ TMemoryInput payloadInput{payload};
+
+ if (fr.LogFormat() == COMPRESSED_LOG_FORMAT_V5) {
+ Decompressor_.Reset(new TBlockCodecStream("zstd_1", payloadInput, payload.size()));
+ } else {
+ TZLibDecompress zlib(&payloadInput);
+ Decompressor_.Reset(new TBlockCodecStream(zlib));
+ if (fr.LogFormat() == COMPRESSED_LOG_FORMAT_V4) {
+ Decompressor_.Reset(new TBlockCodecStream("lz4hc", *Decompressor_, payload.size()));
+ }
+ }
+
+ break;
+ }
+
+ default:
+ ythrow yexception() << "unsupported log format: " << fr.LogFormat() << Endl;
+ break;
+ };
+
+ if (WithRawData_) {
+ TBufferOutput out(UncompressedData_);
+ TLengthLimitedInput limiter(Decompressor_.Get(), fr.Framehdr.UncompressedDatalen);
+
+ TransferData(&limiter, &out);
+ Decompressor_.Reset(new TMemoryInput(UncompressedData_.data(), UncompressedData_.size()));
+ }
+
+ Limiter_.Reset(new TLengthLimitedInput(Decompressor_.Get(), fr.Framehdr.UncompressedDatalen));
+
+ Decode();
+}
+
+TFrameDecoder::~TFrameDecoder() = default;
+
+bool TFrameDecoder::Avail() const {
+ return HaveData();
+}
+
+TConstEventPtr TFrameDecoder::operator*() const {
+ Y_ENSURE(HaveData(), "Decoder depleted");
+
+ return Event_;
+}
+
+bool TFrameDecoder::Next() {
+ if (HaveData()) {
+ Decode();
+ }
+
+ return HaveData();
+}
+
+void TFrameDecoder::Decode() {
+ Event_ = nullptr;
+ const bool framed = (Frame_.LogFormat() == COMPRESSED_LOG_FORMAT_V3) || (Frame_.LogFormat() == COMPRESSED_LOG_FORMAT_V4 || Frame_.LogFormat() == COMPRESSED_LOG_FORMAT_V5);
+
+ size_t evBegin = 0;
+ size_t evEnd = 0;
+ if (WithRawData_)
+ evBegin = UncompressedData_.Size() - Limiter_->Left();
+
+ while (Limiter_->Left() && !(Event_ = DecodeEvent(*Limiter_, framed, Frame_.Address(), Flt_, Fac_, Strict_).Release())) {
+ }
+
+ if (WithRawData_) {
+ evEnd = UncompressedData_.Size() - Limiter_->Left();
+ RawEventData_ = TStringBuf(UncompressedData_.data() + evBegin, UncompressedData_.data() + evEnd);
+ }
+
+ if (!Event_ && (!Flt_ || (Flt_->EventAllowed(TEndOfFrameEvent::EventClass)))) {
+ Event_ = EndOfFrame_.Release();
+ }
+
+ if (!!Event_) {
+ Event_->FrameId = Frame_.FrameId();
+ }
+}
+
+const TStringBuf TFrameDecoder::GetRawEvent() const {
+ return RawEventData_;
+}
+
+TEventStreamer::TEventStreamer(TFrameStream& fs, ui64 s, ui64 e, bool strongOrdering, TIntrusivePtr<TEventFilter> filter, bool losslessStrongOrdering)
+ : Frames_(fs)
+ , Start_(s)
+ , End_(e)
+ , MaxEndTimestamp_(0)
+ , Frontier_(0)
+ , StrongOrdering_(strongOrdering)
+ , LosslessStrongOrdering_(losslessStrongOrdering)
+ , EventFilter_(filter)
+{
+
+ if (Start_ > End_) {
+ ythrow yexception() << "Wrong main interval";
+ }
+
+ TEventStreamer::Next();
+}
+
+TEventStreamer::~TEventStreamer() = default;
+
+bool TEventStreamer::Avail() const {
+ return Events_.Avail() && (*Events_)->Timestamp <= Frontier_;
+}
+
+TConstEventPtr TEventStreamer::operator*() const {
+ Y_ENSURE(TEventStreamer::Avail(), "Event streamer depleted");
+
+ return *Events_;
+}
+
+bool TEventStreamer::Next() {
+ if (Events_.Avail() && Events_.Next() && (*Events_)->Timestamp <= Frontier_) {
+ return true;
+ }
+
+ for (;;) {
+ if (!LoadMoreEvents()) {
+ return false;
+ }
+
+ if (TEventStreamer::Avail()) {
+ return true;
+ }
+ }
+}
+
+/*
+Two parameters are used in the function:
+Frontier - the moment of time up to which inclusively all the log events made their way
+ into the buffer (and might have been already extracted out of it).
+Horizon - the moment of time, that equals to Frontier + MAX_REQUEST_DURATION.
+In order to get all the log events up to the Frontier inclusively,
+ frames need to be read until "end time" of the current frame exceeds the Horizon.
+*/
+bool TEventStreamer::LoadMoreEvents() {
+ if (!Frames_.Avail()) {
+ return false;
+ }
+
+ const TFrame& fr1 = *Frames_;
+ const ui64 maxRequestDuration = (StrongOrdering_ ? MAX_REQUEST_DURATION : 0);
+
+ if (fr1.EndTime() <= Frontier_ + maxRequestDuration) {
+ ythrow yexception() << "Wrong frame stream state";
+ }
+
+ if (Frontier_ >= End_) {
+ return false;
+ }
+
+ const ui64 old_frontier = Frontier_;
+ Frontier_ = fr1.EndTime();
+
+ {
+ Y_DEFER {
+ Events_.Reorder(StrongOrdering_);
+ };
+
+ for (; Frames_.Avail(); Frames_.Next()) {
+ const TFrame& fr2 = *Frames_;
+
+ // Frames need to start later than the Frontier.
+ if (StrongOrdering_ && fr2.StartTime() <= old_frontier) {
+ Cdbg << "Invalid frame encountered" << Endl;
+ continue;
+ }
+
+ if (fr2.EndTime() > MaxEndTimestamp_) {
+ MaxEndTimestamp_ = fr2.EndTime();
+ }
+
+ if (fr2.EndTime() > Frontier_ + maxRequestDuration && !LosslessStrongOrdering_) {
+ return true;
+ }
+
+ // Checking for the frame to be within the main time borders.
+ if (fr2.EndTime() >= Start_ && fr2.StartTime() <= End_) {
+ TransferEvents(fr2);
+ }
+ }
+ }
+
+ Frontier_ = MaxEndTimestamp_;
+
+ return true;
+}
+
+void TEventStreamer::TransferEvents(const TFrame& fr) {
+ Events_.SetCheckpoint();
+
+ try {
+ for (auto it = fr.GetIterator(EventFilter_); it.Avail(); it.Next()) {
+ TConstEventPtr ev = *it;
+
+ if (ev->Timestamp > fr.EndTime() || ev->Timestamp < fr.StartTime()) {
+ ythrow TInvalidEventTimestamps() << "Event timestamp out of frame range";
+ }
+
+ if (ev->Timestamp >= Start_ && ev->Timestamp <= End_) {
+ Events_.Append(ev, StrongOrdering_);
+ }
+ }
+ } catch (const TInvalidEventTimestamps& err) {
+ Events_.Rollback();
+ Cdbg << "EventsTransfer error: InvalidEventTimestamps: " << err.what() << Endl;
+ } catch (const TFrameLoadError& err) {
+ Events_.Rollback();
+ Cdbg << "EventsTransfer error: " << err.what() << Endl;
+ } catch (const TEventDecoderError& err) {
+ Events_.Rollback();
+ Cdbg << "EventsTransfer error: EventDecoder error: " << err.what() << Endl;
+ } catch (const TZLibDecompressorError& err) {
+ Events_.Rollback();
+ Cdbg << "EventsTransfer error: ZLibDecompressor error: " << err.what() << Endl;
+ } catch (...) {
+ Events_.Rollback();
+ throw;
+ }
+}
+
+void TEventStreamer::TEventBuffer::SetCheckpoint() {
+ BufLen_ = Buffer_.size();
+}
+
+void TEventStreamer::TEventBuffer::Rollback() {
+ Buffer_.resize(BufLen_);
+}
+
+void TEventStreamer::TEventBuffer::Reorder(bool strongOrdering) {
+ SetCheckpoint();
+
+ std::reverse(Buffer_.begin(), Buffer_.end());
+
+ if (strongOrdering) {
+ StableSort(Buffer_.begin(), Buffer_.end(), [&](const auto& a, const auto& b) {
+ return (a->Timestamp > b->Timestamp) ||
+ ((a->Timestamp == b->Timestamp) && !a->Class && b->Class);
+ });
+ }
+}
+
+void TEventStreamer::TEventBuffer::Append(TConstEventPtr ev, bool strongOrdering) {
+ // Events in buffer output must be in an ascending order.
+ Y_ENSURE(!strongOrdering || ev->Timestamp >= LastTimestamp_, "Trying to append out-of-order event");
+
+ Buffer_.push_back(std::move(ev));
+}
+
+bool TEventStreamer::TEventBuffer::Avail() const {
+ return !Buffer_.empty();
+}
+
+TConstEventPtr TEventStreamer::TEventBuffer::operator*() const {
+ Y_ENSURE(!Buffer_.empty(), "Event buffer is empty");
+
+ return Buffer_.back();
+}
+
+bool TEventStreamer::TEventBuffer::Next() {
+ if (!Buffer_.empty()) {
+ LastTimestamp_ = Buffer_.back()->Timestamp;
+ Buffer_.pop_back();
+ return !Buffer_.empty();
+ } else {
+ return false;
+ }
+}
diff --git a/library/cpp/eventlog/logparser.h b/library/cpp/eventlog/logparser.h
new file mode 100644
index 0000000000..f819e72589
--- /dev/null
+++ b/library/cpp/eventlog/logparser.h
@@ -0,0 +1,343 @@
+#pragma once
+
+#include <util/generic/ptr.h>
+#include <util/generic/yexception.h>
+#include <util/generic/vector.h>
+#include <util/generic/set.h>
+#include <util/generic/maybe.h>
+#include <util/memory/blob.h>
+#include <util/stream/length.h>
+#include <util/stream/mem.h>
+
+#include "eventlog_int.h"
+#include "eventlog.h"
+#include "common.h"
+
+class IInputStream;
+
+static const ui64 MAX_REQUEST_DURATION = 60'000'000;
+static const ui64 MIN_START_TIME = MAX_REQUEST_DURATION;
+static const ui64 MAX_END_TIME = ((ui64)-1) - MAX_REQUEST_DURATION;
+
+class TEventFilter: public TSet<TEventClass>, public TSimpleRefCount<TEventFilter> {
+public:
+ TEventFilter(bool enableEvents)
+ : Enable_(enableEvents)
+ {
+ }
+
+ void AddEventClass(TEventClass cls) {
+ insert(cls);
+ }
+
+ bool EventAllowed(TEventClass cls) const {
+ bool found = (find(cls) != end());
+
+ return Enable_ == found;
+ }
+
+private:
+ bool Enable_;
+};
+
+using TEventStream = TPacketInputStream<TConstEventPtr>;
+
+struct TFrameHeader {
+ // Reads header from the stream. The caller must make sure that the
+ // sync data is present just befor the stream position.
+ explicit TFrameHeader(IInputStream& in);
+
+ ui64 StartTime() const {
+ return Framehdr.StartTimestamp;
+ }
+
+ ui64 EndTime() const {
+ return Framehdr.EndTimestamp;
+ }
+
+ ui32 FrameId() const {
+ return Basehdr.FrameId;
+ }
+
+ ui64 Duration() const {
+ return EndTime() - StartTime();
+ }
+
+ TEventLogFormat ContentFormat() const {
+ return Basehdr.Format & 0xffffff;
+ }
+
+ TEventLogFormat LogFormat() const {
+ return Basehdr.Format >> 24;
+ }
+
+ ui64 FrameLength() const {
+ return Basehdr.Length - sizeof(TCompressedFrameHeader2);
+ }
+
+ // Length including the header
+ ui64 FullLength() const {
+ return sizeof(*this) + FrameLength();
+ }
+
+ TCompressedFrameBaseHeader Basehdr;
+ TCompressedFrameHeader2 Framehdr;
+};
+
+struct TFrameLoadError: public yexception {
+ explicit TFrameLoadError(size_t skipAfter)
+ : SkipAfter(skipAfter)
+ {}
+
+ size_t SkipAfter;
+};
+
+class TFrame : public TFrameHeader {
+public:
+ // Reads the frame after the header has been read.
+ TFrame(IInputStream& in, TFrameHeader header, IEventFactory*);
+
+ TString GetRawFrame() const;
+ TString GetCompressedFrame() const;
+
+ ui64 Address() const { return Address_; }
+
+private:
+ const TConstEventPtr& GetEvent(size_t index) const {
+ return EventsCache_[index];
+ }
+
+ void ClearEventsCache() const;
+
+ THolder<TLengthLimitedInput> Limiter_;
+ mutable TVector<TConstEventPtr> EventsCache_;
+
+ IEventFactory* Fac_;
+ ui64 Address_;
+
+ friend class TFrameDecoder;
+ friend class TFrameStreamer;
+
+private:
+ class TIterator: TEventStream {
+ public:
+ TIterator(const TFrame& frame, TIntrusiveConstPtr<TEventFilter> filter);
+ ~TIterator() override = default;
+
+ bool Avail() const override {
+ return Index_ < Size_;
+ }
+
+ TConstEventPtr operator*() const override;
+ bool Next() override;
+
+ private:
+ void SkipToValidEvent();
+
+ const TFrame& Frame_;
+ size_t Size_;
+ TIntrusiveConstPtr<TEventFilter> Filter_;
+ size_t Index_;
+ };
+
+public:
+ TFrame::TIterator GetIterator(TIntrusiveConstPtr<TEventFilter> eventFilter = nullptr) const;
+};
+
+// If `in` is derived from TCountingInput, Frame's address will
+// be set accorting to the in->Counter(). Otherwise it will be zeroO
+TMaybe<TFrame> FindNextFrame(IInputStream* in, IEventFactory*);
+
+using TFrameStream = TPacketInputStream<const TFrame&>;
+
+class IFrameFilter: public TSimpleRefCount<IFrameFilter> {
+public:
+ IFrameFilter() {
+ }
+
+ virtual ~IFrameFilter() = default;
+
+ virtual bool FrameAllowed(const TFrame& frame) const = 0;
+};
+
+using IFrameFilterRef = TIntrusivePtr<IFrameFilter>;
+
+class TDurationFrameFilter: public IFrameFilter {
+public:
+ TDurationFrameFilter(ui64 minFrameDuration, ui64 maxFrameDuration = Max<ui64>())
+ : MinDuration_(minFrameDuration)
+ , MaxDuration_(maxFrameDuration)
+ {
+ }
+
+ bool FrameAllowed(const TFrame& frame) const override {
+ return frame.Duration() >= MinDuration_ && frame.Duration() <= MaxDuration_;
+ }
+
+private:
+ const ui64 MinDuration_;
+ const ui64 MaxDuration_;
+};
+
+class TFrameIdFrameFilter: public IFrameFilter {
+public:
+ TFrameIdFrameFilter(ui32 frameId)
+ : FrameId_(frameId)
+ {
+ }
+
+ bool FrameAllowed(const TFrame& frame) const override {
+ return frame.FrameId() == FrameId_;
+ }
+
+private:
+ const ui32 FrameId_;
+};
+
+class TContainsEventFrameFilter: public IFrameFilter {
+public:
+ TContainsEventFrameFilter(const TString& args, const IEventFactory* fac);
+
+ bool FrameAllowed(const TFrame& frame) const override;
+
+private:
+ struct TMatchGroup {
+ TEventClass EventID;
+ TString FieldName;
+ TString ValueToMatch;
+ };
+
+ TVector<TMatchGroup> MatchGroups;
+};
+
+void SplitWithEscaping(TVector<TStringBuf>& tokens, const TStringBuf& stringToSplit, const TStringBuf& externalCharacterSet);
+
+TString UnescapeCharacters(const TStringBuf& stringToUnescape, const TStringBuf& characterSet);
+
+TString GetEventFieldAsString(const NProtoBuf::Message* message, const google::protobuf::FieldDescriptor* fieldDescriptor, const google::protobuf::Reflection* reflection);
+
+class TFrameStreamer: public TFrameStream {
+public:
+ TFrameStreamer(IInputStream&, IEventFactory* fac, IFrameFilterRef ff = nullptr);
+ TFrameStreamer(
+ const TString& fileName,
+ ui64 startTime,
+ ui64 endTime,
+ ui64 maxRequestDuration,
+ IEventFactory* fac,
+ IFrameFilterRef ff = nullptr);
+ ~TFrameStreamer() override;
+
+ bool Avail() const override;
+ const TFrame& operator*() const override;
+ bool Next() override;
+
+private:
+ bool DoNext();
+ bool AllowedTimeRange(const TFrame& frame) const;
+
+ bool AllowedFrame(const TFrame& frame) const {
+ return AllowedTimeRange(frame) && (!FrameFilter_ || FrameFilter_->FrameAllowed(frame));
+ }
+
+ void SkipToAllowedFrame() {
+ if (Frame_) {
+ while (!AllowedFrame(*Frame_) && DoNext()) {
+ //do nothing
+ }
+ }
+ }
+
+ TBlob File_;
+ TMemoryInput MemoryIn_;
+ TCountingInput In_;
+ THolder<IInputStream> Stream_;
+ ui64 StartTime_ = 0;
+ ui64 EndTime_ = 0;
+ ui64 CutoffTime_ = 0;
+ TMaybe<TFrame> Frame_;
+ IFrameFilterRef FrameFilter_;
+ IEventFactory* EventFactory_;
+};
+
+class TFrameDecoder: TEventStream {
+public:
+ TFrameDecoder(const TFrame&, const TEventFilter* const filter, bool strict = false, bool withRawData = false);
+ ~TFrameDecoder() override;
+
+ bool Avail() const override;
+
+ TConstEventPtr operator*() const override;
+ bool Next() override;
+
+ const TStringBuf GetRawEvent() const;
+
+private:
+ TFrameDecoder(const TFrameDecoder&);
+ void operator=(const TFrameDecoder&);
+
+ inline bool HaveData() const {
+ return Event_ != nullptr;
+ }
+
+ void Decode();
+
+private:
+ const TFrame& Frame_;
+ THolder<IInputStream> Decompressor_;
+ THolder<TLengthLimitedInput> Limiter_;
+ TEventPtr Event_;
+ const TEventFilter* const Flt_;
+ IEventFactory* Fac_;
+ THolder<TEvent> EndOfFrame_;
+ bool Strict_;
+ TBuffer UncompressedData_;
+ TStringBuf RawEventData_;
+ bool WithRawData_;
+};
+
+class TEventStreamer: public TEventStream {
+public:
+ TEventStreamer(TFrameStream&, ui64 start, ui64 end, bool strongOrdering, TIntrusivePtr<TEventFilter> filter, bool losslessStrongOrdering = false);
+ ~TEventStreamer() override;
+
+ bool Avail() const override;
+ TConstEventPtr operator*() const override;
+ bool Next() override;
+
+private:
+ class TEventBuffer: public TEventStream {
+ public:
+ void SetCheckpoint();
+ void Rollback();
+ void Reorder(bool strongOrdering);
+ void Append(TConstEventPtr event, bool strongOrdering);
+
+ bool Avail() const override;
+ TConstEventPtr operator*() const override;
+ bool Next() override;
+
+ private:
+ TVector<TConstEventPtr> Buffer_;
+ size_t BufLen_ = 0;
+ ui64 LastTimestamp_ = 0;
+ };
+
+private:
+ struct TInvalidEventTimestamps: public yexception {
+ };
+
+ bool LoadMoreEvents();
+ void TransferEvents(const TFrame&);
+
+private:
+ TFrameStream& Frames_;
+ TEventBuffer Events_;
+
+ ui64 Start_, End_;
+ ui64 MaxEndTimestamp_;
+ ui64 Frontier_;
+ bool StrongOrdering_;
+ bool LosslessStrongOrdering_;
+ TIntrusivePtr<TEventFilter> EventFilter_;
+};
diff --git a/library/cpp/eventlog/proto/events_extension.proto b/library/cpp/eventlog/proto/events_extension.proto
new file mode 100644
index 0000000000..7db1af3a59
--- /dev/null
+++ b/library/cpp/eventlog/proto/events_extension.proto
@@ -0,0 +1,22 @@
+import "google/protobuf/descriptor.proto";
+
+option go_package = "github.com/ydb-platform/ydb/library/cpp/eventlog/proto;extensions";
+option java_package = "NEventLogEventsExtension";
+
+extend google.protobuf.MessageOptions {
+ optional uint32 message_id = 50001;
+ optional string realm_name = 50002;
+}
+
+message Repr {
+ enum ReprType {
+ none = 0;
+ as_bytes = 1; // Only for primitive types
+ as_hex = 2; // Only for primitive types
+ as_base64 = 3; // Only for 'string' and 'bytes' fields
+ };
+}
+
+extend google.protobuf.FieldOptions {
+ optional Repr.ReprType repr = 55003 [default = none];
+}
diff --git a/library/cpp/eventlog/proto/internal.proto b/library/cpp/eventlog/proto/internal.proto
new file mode 100644
index 0000000000..8070a09685
--- /dev/null
+++ b/library/cpp/eventlog/proto/internal.proto
@@ -0,0 +1,9 @@
+option go_package = "github.com/ydb-platform/ydb/library/cpp/eventlog/proto;extensions";
+
+package NEventLogInternal;
+
+message TUnknownEvent {
+};
+
+message TEndOfFrameEvent {
+};
diff --git a/library/cpp/eventlog/proto/ya.make b/library/cpp/eventlog/proto/ya.make
new file mode 100644
index 0000000000..fbf5a6c619
--- /dev/null
+++ b/library/cpp/eventlog/proto/ya.make
@@ -0,0 +1,12 @@
+PROTO_LIBRARY()
+
+IF (NOT PY_PROTOS_FOR)
+ INCLUDE_TAGS(GO_PROTO)
+ENDIF()
+
+SRCS(
+ events_extension.proto
+ internal.proto
+)
+
+END()
diff --git a/library/cpp/eventlog/threaded_eventlog.cpp b/library/cpp/eventlog/threaded_eventlog.cpp
new file mode 100644
index 0000000000..67839063fb
--- /dev/null
+++ b/library/cpp/eventlog/threaded_eventlog.cpp
@@ -0,0 +1 @@
+#include "threaded_eventlog.h"
diff --git a/library/cpp/eventlog/threaded_eventlog.h b/library/cpp/eventlog/threaded_eventlog.h
new file mode 100644
index 0000000000..52382b856d
--- /dev/null
+++ b/library/cpp/eventlog/threaded_eventlog.h
@@ -0,0 +1,154 @@
+#pragma once
+
+#include "eventlog.h"
+
+#include <util/generic/string.h>
+#include <util/thread/pool.h>
+
+class TThreadedEventLog: public TEventLogWithSlave {
+public:
+ class TWrapper;
+ using TOverflowCallback = std::function<void(TWrapper& wrapper)>;
+
+ enum class EDegradationResult {
+ ShouldWrite,
+ ShouldDrop,
+ };
+ using TDegradationCallback = std::function<EDegradationResult(float fillFactor)>;
+
+public:
+ TThreadedEventLog(
+ IEventLog& parentLog,
+ size_t threadCount,
+ size_t queueSize,
+ TOverflowCallback cb,
+ TDegradationCallback degradationCallback = {})
+ : TEventLogWithSlave(parentLog)
+ , LogSaver(TThreadPoolParams().SetThreadName("ThreadedEventLog"))
+ , ThreadCount(threadCount)
+ , QueueSize(queueSize)
+ , OverflowCallback(std::move(cb))
+ , DegradationCallback(std::move(degradationCallback))
+ {
+ Init();
+ }
+
+ TThreadedEventLog(
+ const TEventLogPtr& parentLog,
+ size_t threadCount,
+ size_t queueSize,
+ TOverflowCallback cb,
+ TDegradationCallback degradationCallback = {})
+ : TEventLogWithSlave(parentLog)
+ , LogSaver(TThreadPoolParams().SetThreadName("ThreadedEventLog"))
+ , ThreadCount(threadCount)
+ , QueueSize(queueSize)
+ , OverflowCallback(std::move(cb))
+ , DegradationCallback(std::move(degradationCallback))
+ {
+ Init();
+ }
+
+ TThreadedEventLog(IEventLog& parentLog)
+ : TThreadedEventLog(parentLog, 1, 0, TOverflowCallback())
+ {
+ }
+
+ TThreadedEventLog(const TEventLogPtr& parentLog)
+ : TThreadedEventLog(parentLog, 1, 0, TOverflowCallback())
+ {
+ }
+
+ ~TThreadedEventLog() override {
+ try {
+ LogSaver.Stop();
+ } catch (...) {
+ }
+ }
+
+ void ReopenLog() override {
+ TEventLogWithSlave::ReopenLog();
+ }
+
+ void CloseLog() override {
+ LogSaver.Stop();
+ TEventLogWithSlave::CloseLog();
+ }
+
+ void WriteFrame(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ TWriteFrameCallbackPtr writeFrameCallback = nullptr,
+ TLogRecord::TMetaFlags metaFlags = {}) override {
+ float fillFactor = 0.0f;
+ if (Y_LIKELY(LogSaver.GetMaxQueueSize() > 0)) {
+ fillFactor = static_cast<float>(LogSaver.Size()) / LogSaver.GetMaxQueueSize();
+ }
+
+ EDegradationResult status = EDegradationResult::ShouldWrite;
+ if (DegradationCallback) {
+ status = DegradationCallback(fillFactor);
+ }
+ if (Y_UNLIKELY(status == EDegradationResult::ShouldDrop)) {
+ return;
+ }
+
+ THolder<TWrapper> wrapped;
+ wrapped.Reset(new TWrapper(buffer, startTimestamp, endTimestamp, Slave(), writeFrameCallback, std::move(metaFlags)));
+
+ if (LogSaver.Add(wrapped.Get())) {
+ Y_UNUSED(wrapped.Release());
+ } else if (OverflowCallback) {
+ OverflowCallback(*wrapped);
+ }
+ }
+
+private:
+ void Init() {
+ LogSaver.Start(ThreadCount, QueueSize);
+ }
+
+public:
+ class TWrapper: public IObjectInQueue {
+ public:
+ TWrapper(TBuffer& buffer,
+ TEventTimestamp startTimestamp,
+ TEventTimestamp endTimestamp,
+ IEventLog& slave,
+ TWriteFrameCallbackPtr writeFrameCallback = nullptr,
+ TLogRecord::TMetaFlags metaFlags = {})
+ : StartTimestamp(startTimestamp)
+ , EndTimestamp(endTimestamp)
+ , Slave(&slave)
+ , WriteFrameCallback(writeFrameCallback)
+ , MetaFlags(std::move(metaFlags))
+ {
+ Buffer.Swap(buffer);
+ }
+
+ void Process(void*) override {
+ THolder<TWrapper> holder(this);
+
+ WriteFrame();
+ }
+
+ void WriteFrame() {
+ Slave->WriteFrame(Buffer, StartTimestamp, EndTimestamp, WriteFrameCallback, std::move(MetaFlags));
+ }
+
+ private:
+ TBuffer Buffer;
+ TEventTimestamp StartTimestamp;
+ TEventTimestamp EndTimestamp;
+ IEventLog* Slave;
+ TWriteFrameCallbackPtr WriteFrameCallback;
+ TLogRecord::TMetaFlags MetaFlags;
+ };
+
+private:
+ TThreadPool LogSaver;
+ const size_t ThreadCount;
+ const size_t QueueSize;
+ const TOverflowCallback OverflowCallback;
+ const TDegradationCallback DegradationCallback;
+};
diff --git a/library/cpp/eventlog/ya.make b/library/cpp/eventlog/ya.make
new file mode 100644
index 0000000000..fbbc1eff00
--- /dev/null
+++ b/library/cpp/eventlog/ya.make
@@ -0,0 +1,29 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/blockcodecs
+ library/cpp/eventlog/proto
+ library/cpp/json
+ library/cpp/logger
+ library/cpp/protobuf/json
+ library/cpp/streams/growing_file_input
+ library/cpp/string_utils/base64
+ contrib/libs/re2
+)
+
+SRCS(
+ common.h
+ evdecoder.cpp
+ event_field_output.cpp
+ event_field_printer.cpp
+ eventlog.cpp
+ eventlog_int.cpp
+ iterator.cpp
+ logparser.cpp
+ threaded_eventlog.cpp
+)
+
+GENERATE_ENUM_SERIALIZATION(eventlog.h)
+GENERATE_ENUM_SERIALIZATION(eventlog_int.h)
+
+END()
diff --git a/library/cpp/fieldcalc/field_calc.cpp b/library/cpp/fieldcalc/field_calc.cpp
new file mode 100644
index 0000000000..1066b5b5e6
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc.cpp
@@ -0,0 +1,1136 @@
+#include <cstdio>
+
+#include <util/str_stl.h>
+#include <util/string/subst.h>
+#include <util/string/util.h>
+#include <util/string/cast.h>
+#include <util/stream/printf.h>
+
+#include "field_calc_int.h"
+
+using namespace std;
+
+enum Operators {
+ OP_ADD,
+ OP_SUBSTRACT,
+ OP_MULTIPLY,
+ OP_DIVIDE,
+ OP_MODULUS,
+ OP_REGEXP,
+ OP_REGEXP_NOT,
+ OP_LEFT_SHIFT,
+ OP_RIGHT_SHIFT,
+ OP_EQUAL,
+ OP_NOT_EQUAL,
+ OP_LESS,
+ OP_LESS_OR_EQUAL,
+ OP_GREATER,
+ OP_GREATER_OR_EQUAL,
+ OP_XOR,
+ OP_BITWISE_OR,
+ OP_BITWISE_AND,
+ OP_LOGICAL_OR,
+ OP_LOGICAL_AND,
+ OP_UNARY_NOT,
+ OP_UNARY_COMPLEMENT,
+ OP_UNARY_MINUS,
+ OP_LOG,
+ OP_LOG10,
+ OP_ROUND,
+ OP_ASSIGN,
+ OP_QUESTION,
+ OP_COLON,
+
+ OP_UNKNOWN,
+};
+
+struct calc_op;
+
+struct calc_elem {
+ dump_item item;
+ char oper;
+ int op_prio;
+};
+
+struct calc_op {
+ dump_item Left, Right;
+ char Oper;
+ bool force_long;
+ bool unary;
+ bool is_variable;
+ bool string_op; // TODO -> bitop
+
+ // for local vars
+ mutable bool calculated;
+ mutable eval_res_type result;
+
+ calc_op(calc_elem& left, calc_elem& right)
+ : Left(left.item)
+ , Right(right.item)
+ , Oper(right.oper)
+ , is_variable(false)
+ , calculated(false)
+ , result(false)
+ {
+ force_long = Oper == OP_XOR || Oper == OP_BITWISE_OR || Oper == OP_BITWISE_AND ||
+ Oper == OP_LOGICAL_OR || Oper == OP_LOGICAL_AND || Oper == OP_UNARY_NOT ||
+ Oper == OP_UNARY_COMPLEMENT || Oper == OP_LEFT_SHIFT || Oper == OP_RIGHT_SHIFT ||
+ Oper == OP_MODULUS;
+ unary = Oper == OP_UNARY_NOT || Oper == OP_UNARY_COMPLEMENT || Oper == OP_UNARY_MINUS ||
+ Oper == OP_LOG || Oper == OP_LOG10 || Oper == OP_ROUND;
+ string_op = IsStringType(Left.type) && IsStringType(Right.type) &&
+ (Oper == OP_REGEXP || Oper == OP_REGEXP_NOT || Oper == OP_EQUAL || Oper == OP_NOT_EQUAL ||
+ Oper == OP_LESS || Oper == OP_LESS_OR_EQUAL || Oper == OP_GREATER || Oper == OP_GREATER_OR_EQUAL);
+ if (Oper == OP_REGEXP || Oper == OP_REGEXP_NOT) {
+ if (!string_op)
+ ythrow yexception() << "calc-expr: regexp requested for non-strings";
+ ythrow yexception() << "calc-expr: regexps currently not supported";
+ }
+ }
+
+ Y_FORCE_INLINE void eval(const char** dd) const {
+ if (is_variable) {
+ if (!calculated) {
+ do_eval(dd);
+ calculated = true;
+ }
+ } else {
+ do_eval(dd);
+ }
+ }
+
+private:
+ Y_FORCE_INLINE void do_eval(const char** dd) const;
+};
+
+void calc_op::do_eval(const char** dd) const {
+ eval_res_type left1 = unary ? (eval_res_type) false : Left.eval(dd);
+ if (Oper == OP_QUESTION) {
+ left1.to_long();
+ if (left1.res_long) {
+ result = Right.eval(dd);
+ } else {
+ result = eval_res_type(); // null
+ }
+ return;
+ } else if (Oper == OP_COLON) {
+ if (left1.is_null()) {
+ result = Right.eval(dd);
+ } else {
+ result = left1;
+ }
+ return;
+ }
+
+ if (Y_UNLIKELY(string_op)) {
+ TStringBuf left2 = Left.GetStrBuf(dd);
+ TStringBuf right2 = Right.GetStrBuf(dd);
+ switch (Oper) {
+ case OP_REGEXP:
+ result = false;
+ break;
+ case OP_REGEXP_NOT:
+ result = false;
+ break;
+ case OP_EQUAL:
+ result = left2 == right2;
+ break;
+ case OP_NOT_EQUAL:
+ result = left2 != right2;
+ break;
+ case OP_LESS:
+ result = left2 < right2;
+ break;
+ case OP_LESS_OR_EQUAL:
+ result = left2 <= right2;
+ break;
+ case OP_GREATER:
+ result = left2 > right2;
+ break;
+ case OP_GREATER_OR_EQUAL:
+ result = left2 >= right2;
+ break;
+ default:
+ assert(false);
+ }
+ return;
+ }
+
+ eval_res_type right1 = Right.eval(dd);
+ if (force_long) { // logical ops will be all long
+ left1.to_long();
+ right1.to_long();
+ }
+ switch (Oper) {
+ case OP_ADD:
+ result = left1 + right1;
+ break;
+ case OP_SUBSTRACT:
+ result = left1 - right1;
+ break;
+ case OP_MULTIPLY:
+ result = left1 * right1;
+ break;
+ case OP_DIVIDE:
+ result = left1 / right1;
+ break;
+ case OP_MODULUS:
+ result = left1.res_long ? left1.res_long % right1.res_long : 0;
+ break;
+ case OP_LEFT_SHIFT:
+ result = left1.res_long << right1.res_long;
+ break;
+ case OP_RIGHT_SHIFT:
+ result = left1.res_long >> right1.res_long;
+ break;
+ case OP_EQUAL:
+ result = left1 == right1;
+ break;
+ case OP_NOT_EQUAL:
+ result = !(left1 == right1);
+ break;
+ case OP_LESS:
+ result = left1 < right1;
+ break;
+ case OP_LESS_OR_EQUAL:
+ result = !(right1 < left1);
+ break; // <=
+ case OP_GREATER:
+ result = right1 < left1;
+ break;
+ case OP_GREATER_OR_EQUAL:
+ result = !(left1 < right1);
+ break; // >=
+ case OP_XOR:
+ result = left1.res_long ^ right1.res_long;
+ break;
+ case OP_BITWISE_OR:
+ result = left1.res_long | right1.res_long;
+ break;
+ case OP_BITWISE_AND:
+ result = left1.res_long & right1.res_long;
+ break;
+ case OP_LOGICAL_OR:
+ result = left1.res_long || right1.res_long;
+ break;
+ case OP_LOGICAL_AND:
+ result = left1.res_long && right1.res_long;
+ break;
+ case OP_UNARY_NOT:
+ result = !right1.res_long;
+ break;
+ case OP_UNARY_COMPLEMENT:
+ result = ~right1.res_long;
+ break;
+ case OP_UNARY_MINUS:
+ result = Minus(right1);
+ break;
+ case OP_LOG:
+ result = Log(right1);
+ break;
+ case OP_LOG10:
+ result = Log10(right1);
+ break;
+ case OP_ROUND:
+ result = Round(right1);
+ break;
+ default:
+ assert(false);
+ }
+}
+
+namespace {
+ // copy-paste of fcat(TString)
+ // we don't want it to be too slow, yet we don't want do slow down our
+ // main functionality, libc fprintf, even a little
+ size_t Y_PRINTF_FORMAT(2, 3) fprintf(TString* s, const char* c, ...) {
+ TStringOutput so(*s);
+
+ va_list params;
+ va_start(params, c);
+ const size_t ret = Printf(so, c, params);
+ va_end(params);
+
+ return ret;
+ }
+ size_t Y_PRINTF_FORMAT(2, 3) fprintf(IOutputStream* s, const char* c, ...) {
+ va_list params;
+ va_start(params, c);
+ const size_t ret = Printf(*s, c, params);
+ va_end(params);
+
+ return ret;
+ }
+}
+
+template <class TOut>
+void dump_item::print(TOut* p, const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+
+ switch (type) {
+ case DIT_FAKE_ITEM:
+ assert(false);
+ break;
+ case DIT_MATH_RESULT:
+ assert(false);
+ break; // must call eval instead
+ case DIT_NAME:
+ assert(false);
+ break; // no op
+
+ case DIT_BOOL_FIELD:
+ fprintf(p, *(bool*)(d + field_offset) ? "true" : "false");
+ break;
+ case DIT_UI8_FIELD:
+ fprintf(p, "%u", *(ui8*)(d + field_offset));
+ break;
+ case DIT_UI16_FIELD:
+ fprintf(p, "%u", *(ui16*)(d + field_offset));
+ break;
+ case DIT_UI32_FIELD:
+ fprintf(p, "%u", *(ui32*)(d + field_offset));
+ break;
+ case DIT_I64_FIELD:
+ fprintf(p, "%" PRId64, *(i64*)(d + field_offset));
+ break;
+ case DIT_UI64_FIELD:
+ fprintf(p, "%" PRIu64, *(ui64*)(d + field_offset));
+ break;
+ case DIT_FLOAT_FIELD:
+ fprintf(p, "%.4f", *(float*)(d + field_offset));
+ break;
+ case DIT_DOUBLE_FIELD:
+ fprintf(p, "%.7f", *(double*)(d + field_offset));
+ break;
+ case DIT_TIME_T32_FIELD:
+ fprintf(p, "%ld", (long)*(time_t32*)(d + field_offset));
+ break;
+ case DIT_PF16UI32_FIELD:
+ fprintf(p, "%u", (ui32) * (pf16ui32*)(d + field_offset));
+ break;
+ case DIT_PF16FLOAT_FIELD:
+ fprintf(p, "%.4f", (float)*(pf16float*)(d + field_offset));
+ break;
+ case DIT_SF16FLOAT_FIELD:
+ fprintf(p, "%.4f", (float)*(sf16float*)(d + field_offset));
+ break;
+ case DIT_STRING_FIELD:
+ fprintf(p, "%s", (d + field_offset));
+ break;
+
+ case DIT_LONG_CONST:
+ fprintf(p, "%ld", long_const);
+ break;
+ case DIT_FLOAT_CONST:
+ fprintf(p, "%.4f", float_const);
+ break;
+ case DIT_STR_CONST:
+ fprintf(p, "%.*s", (int)the_buf.size(), the_buf.data());
+ break;
+
+ case DIT_INT_FUNCTION:
+ fprintf(p, "%d", (f->*int_fn)());
+ break;
+ case DIT_FLOAT_FUNCTION:
+ fprintf(p, "%.4f", (f->*float_fn)());
+ break;
+ case DIT_BOOL_FUNCTION:
+ fprintf(p, "%d", (f->*bool_fn)());
+ break;
+ case DIT_STR_FUNCTION:
+ fprintf(p, "%s", (f->*str_fn)());
+ break;
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ fprintf(p, "%s", (f->*strbuf_2_fn)(the_buf, nullptr));
+ break;
+
+ case DIT_UI8_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui8_ext_fn)(f));
+ break;
+ case DIT_UI16_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui16_ext_fn)(f));
+ break;
+ case DIT_UI32_EXT_FUNCTION:
+ fprintf(p, "%u", (*ui32_ext_fn)(f));
+ break;
+ case DIT_UI64_EXT_FUNCTION:
+ fprintf(p, "%" PRIu64, (*ui64_ext_fn)(f));
+ break;
+
+ case DIT_UI8_ENUM_EQ:
+ fprintf(p, "%d", *(ui8*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI8_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui8*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_UI16_ENUM_EQ:
+ fprintf(p, "%d", *(ui16*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI16_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui16*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_UI32_ENUM_EQ:
+ fprintf(p, "%d", *(ui32*)(d + field_offset) == enum_val);
+ break;
+ case DIT_UI32_ENUM_SET:
+ fprintf(p, "%d", !!(*(ui32*)(d + field_offset) & enum_val));
+ break;
+
+ case DIT_INT_ENUM_FUNCTION_EQ:
+ fprintf(p, "%d", (ui32)(f->*int_enum_fn)() == enum_val);
+ break;
+ case DIT_INT_ENUM_FUNCTION_SET:
+ fprintf(p, "%d", !!(ui32)((f->*int_enum_fn)() & enum_val));
+ break;
+
+ case DIT_BOOL_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*bool_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI8_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*ui8_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI16_FUNC_FIXED_STR:
+ fprintf(p, "%u", (ui32)(f->*ui16_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI32_FUNC_FIXED_STR:
+ fprintf(p, "%u", (f->*ui32_strbuf_fn)(the_buf));
+ break;
+ case DIT_I64_FUNC_FIXED_STR:
+ fprintf(p, "%" PRId64, (f->*i64_strbuf_fn)(the_buf));
+ break;
+ case DIT_UI64_FUNC_FIXED_STR:
+ fprintf(p, "%" PRIu64, (f->*ui64_strbuf_fn)(the_buf));
+ break;
+ case DIT_FLOAT_FUNC_FIXED_STR:
+ fprintf(p, "%.4f", (f->*float_strbuf_fn)(the_buf));
+ break;
+ case DIT_DOUBLE_FUNC_FIXED_STR:
+ fprintf(p, "%.7f", (f->*double_strbuf_fn)(the_buf));
+ break;
+
+ case DIT_RESOLVE_BY_NAME:
+ fprintf(p, "%s", (f->*resolve_fn)(the_buf).data());
+ break;
+
+ default:
+ assert(false);
+ break;
+ }
+}
+
+// instantiate, just for a case
+template void dump_item::print<FILE>(FILE* p, const char** dd) const;
+template void dump_item::print<TString>(TString* p, const char** dd) const;
+template void dump_item::print<IOutputStream>(IOutputStream* p, const char** dd) const;
+
+TStringBuf dump_item::GetStrBuf(const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+ switch (type) {
+ case DIT_STRING_FIELD:
+ return d + field_offset;
+ case DIT_STR_CONST:
+ return the_buf;
+ case DIT_STR_FUNCTION:
+ return (f->*str_fn)();
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ return (f->*strbuf_2_fn)(the_buf, nullptr);
+ case DIT_RESOLVE_BY_NAME:
+ return (f->*resolve_fn)(the_buf);
+ default:
+ assert(false);
+ return TStringBuf();
+ }
+}
+
+// recursive
+eval_res_type dump_item::eval(const char** dd) const {
+ const char* d = dd[pack_id];
+ const fake* f = reinterpret_cast<const fake*>(d);
+
+ switch (type) {
+ case DIT_FAKE_ITEM:
+ assert(false);
+ return (long int)0;
+ case DIT_MATH_RESULT:
+ this->op->eval(dd);
+ return this->op->result;
+ case DIT_NAME:
+ assert(false);
+ return (long int)0;
+
+ case DIT_BOOL_FIELD:
+ return (ui32) * (bool*)(d + field_offset);
+ case DIT_UI8_FIELD:
+ return (ui32) * (ui8*)(d + field_offset);
+ case DIT_UI16_FIELD:
+ return (ui32) * (ui16*)(d + field_offset);
+ case DIT_UI32_FIELD:
+ return (ui32) * (ui32*)(d + field_offset);
+ case DIT_I64_FIELD:
+ return (long)*(i64*)(d + field_offset); // TODO: 64 bit support in calculator?
+ case DIT_UI64_FIELD:
+ return (long)*(ui64*)(d + field_offset); // TODO: 64 bit support in calculator?
+ case DIT_FLOAT_FIELD:
+ return (float)*(float*)(d + field_offset);
+ case DIT_DOUBLE_FIELD:
+ return *(double*)(d + field_offset);
+ case DIT_TIME_T32_FIELD:
+ return (long)*(time_t32*)(d + field_offset);
+ case DIT_PF16UI32_FIELD:
+ return (ui32) * (pf16ui32*)(d + field_offset);
+ case DIT_PF16FLOAT_FIELD:
+ return (float)*(pf16float*)(d + field_offset);
+ case DIT_SF16FLOAT_FIELD:
+ return (float)*(sf16float*)(d + field_offset);
+ case DIT_STRING_FIELD:
+ return !!d[field_offset]; // we don't have any string functions, just 0 if empty
+
+ case DIT_LONG_CONST:
+ return long_const;
+ case DIT_FLOAT_CONST:
+ return float_const;
+ case DIT_STR_CONST:
+ return !!the_buf;
+
+ case DIT_INT_FUNCTION:
+ return (long)(f->*int_fn)();
+ case DIT_FLOAT_FUNCTION:
+ return (float)(f->*float_fn)();
+ case DIT_BOOL_FUNCTION:
+ return (long)(f->*bool_fn)();
+ case DIT_STR_FUNCTION:
+ return !!*(f->*str_fn)(); // string -> int
+ case DIT_STRBUF_FUNCTION:
+ the_buf.clear();
+ return !!*(f->*strbuf_2_fn)(the_buf, nullptr); // string -> 0/1
+
+ case DIT_UI8_EXT_FUNCTION:
+ return (ui32)(*ui8_ext_fn)(f);
+ case DIT_UI16_EXT_FUNCTION:
+ return (ui32)(*ui16_ext_fn)(f);
+ case DIT_UI32_EXT_FUNCTION:
+ return (ui32)(*ui32_ext_fn)(f);
+ case DIT_UI64_EXT_FUNCTION:
+ return (long)(*ui64_ext_fn)(f); // TODO: 64 bit support in calculator?
+
+ case DIT_UI8_ENUM_EQ:
+ return (ui32)(*(ui8*)(d + field_offset) == enum_val);
+ case DIT_UI8_ENUM_SET:
+ return !!(ui32)(*(ui8*)(d + field_offset) & enum_val);
+
+ case DIT_UI16_ENUM_EQ:
+ return (ui32)(*(ui16*)(d + field_offset) == enum_val);
+ case DIT_UI16_ENUM_SET:
+ return !!(ui32)(*(ui16*)(d + field_offset) & enum_val);
+
+ case DIT_UI32_ENUM_EQ:
+ return (ui32)(*(ui32*)(d + field_offset) == enum_val);
+ case DIT_UI32_ENUM_SET:
+ return !!(ui32)(*(ui32*)(d + field_offset) & enum_val);
+
+ case DIT_INT_ENUM_FUNCTION_EQ:
+ return (ui32)((ui32)(f->*int_enum_fn)() == enum_val);
+ case DIT_INT_ENUM_FUNCTION_SET:
+ return !!(ui32)((ui32)(f->*int_enum_fn)() & enum_val);
+
+ case DIT_BOOL_FUNC_FIXED_STR:
+ return (ui32)(f->*bool_strbuf_fn)(the_buf);
+ case DIT_UI8_FUNC_FIXED_STR:
+ return (ui32)(f->*ui8_strbuf_fn)(the_buf);
+ case DIT_UI16_FUNC_FIXED_STR:
+ return (ui32)(f->*ui16_strbuf_fn)(the_buf);
+ case DIT_UI32_FUNC_FIXED_STR:
+ return (ui32)(f->*ui32_strbuf_fn)(the_buf);
+ case DIT_I64_FUNC_FIXED_STR:
+ return (long)(f->*i64_strbuf_fn)(the_buf);
+ case DIT_UI64_FUNC_FIXED_STR:
+ return (long)(f->*ui64_strbuf_fn)(the_buf);
+ case DIT_FLOAT_FUNC_FIXED_STR:
+ return (float)(f->*float_strbuf_fn)(the_buf);
+ case DIT_DOUBLE_FUNC_FIXED_STR:
+ return (double)(f->*double_strbuf_fn)(the_buf);
+
+ case DIT_RESOLVE_BY_NAME:
+ return !!(f->*resolve_fn)(the_buf);
+
+ default:
+ assert(false);
+ break;
+ }
+
+ // unreached
+ return eval_res_type(false);
+}
+
+void dump_item::set_arrind(int arrind) {
+ switch (type) {
+ case DIT_BOOL_FIELD:
+ field_offset += arrind * sizeof(bool);
+ break;
+ case DIT_UI8_FIELD:
+ field_offset += arrind * sizeof(ui8);
+ break;
+ case DIT_UI16_FIELD:
+ field_offset += arrind * sizeof(ui16);
+ break;
+ case DIT_UI32_FIELD:
+ field_offset += arrind * sizeof(ui32);
+ break;
+ case DIT_I64_FIELD:
+ field_offset += arrind * sizeof(i64);
+ break;
+ case DIT_UI64_FIELD:
+ field_offset += arrind * sizeof(ui64);
+ break;
+ case DIT_FLOAT_FIELD:
+ field_offset += arrind * sizeof(float);
+ break;
+ case DIT_DOUBLE_FIELD:
+ field_offset += arrind * sizeof(double);
+ break;
+ case DIT_TIME_T32_FIELD:
+ field_offset += arrind * sizeof(time_t32);
+ break;
+ case DIT_PF16UI32_FIELD:
+ field_offset += arrind * sizeof(pf16ui32);
+ break;
+ case DIT_PF16FLOAT_FIELD:
+ field_offset += arrind * sizeof(pf16float);
+ break;
+ case DIT_SF16FLOAT_FIELD:
+ field_offset += arrind * sizeof(sf16float);
+ break;
+ default:
+ break;
+ }
+}
+
+static str_spn FieldNameChars("a-zA-Z0-9_$", true);
+static str_spn MathOpChars("-+=*%/&|<>()!~^?:#", true);
+static str_spn SpaceChars("\t\n\r ", true);
+
+TFieldCalculatorBase::TFieldCalculatorBase() {
+}
+
+TFieldCalculatorBase::~TFieldCalculatorBase() = default;
+
+bool TFieldCalculatorBase::item_by_name(dump_item& it, const char* name) const {
+ for (size_t i = 0; i < named_dump_items.size(); i++) {
+ const named_dump_item* list = named_dump_items[i].first;
+ size_t sz = named_dump_items[i].second;
+ for (unsigned int n = 0; n < sz; n++) {
+ if (!stricmp(name, list[n].name)) {
+ it = list[n].item;
+ it.pack_id = i;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool TFieldCalculatorBase::get_local_var(dump_item& dst, char* var_name) {
+ TMap<const char*, dump_item>::const_iterator it = local_vars.find(var_name);
+ if (it == local_vars.end()) {
+ // New local variable
+ dst.type = DIT_LOCAL_VARIABLE;
+ dst.local_var_name = pool.append(var_name);
+ return false;
+ } else {
+ dst = it->second;
+ return true;
+ }
+}
+
+char* TFieldCalculatorBase::get_field(dump_item& dst, char* s) {
+ if (!stricmp(s, "name")) {
+ dst.type = DIT_NAME;
+ return s + 4; // leave there 0
+ }
+
+ if (*s == '"' || *s == '\'') {
+ char* end = strchr(s + 1, *s);
+ bool hasEsc = false;
+ while (end && end > s + 1 && end[-1] == '\\') {
+ end = strchr(end + 1, *s);
+ hasEsc = true;
+ }
+ if (!end)
+ ythrow yexception() << "calc-expr: unterminated string constant at " << s;
+ dst.type = DIT_STR_CONST;
+ dst.the_buf.assign(s + 1, end);
+ if (hasEsc)
+ SubstGlobal(dst.the_buf, *s == '"' ? "\\\"" : "\\'", *s == '"' ? "\"" : "'");
+ dst.set_arrind(0); // just for a case
+ return end + 1;
+ }
+
+ bool is_number = isdigit((ui8)*s) || (*s == '+' || *s == '-') && isdigit((ui8)s[1]), is_float = false;
+ char* end = FieldNameChars.cbrk(s + is_number);
+ if (is_number && *end == '.') {
+ is_float = true;
+ end = FieldNameChars.cbrk(end + 1);
+ }
+ char* next = SpaceChars.cbrk(end);
+ int arr_index = 0;
+ bool has_arr_index = false;
+ if (*next == '[') {
+ arr_index = atoi(next + 1);
+ has_arr_index = true;
+ next = strchr(next, ']');
+ if (!next)
+ ythrow yexception() << "calc-expr: No closing ']' for '" << s << "'";
+ next = SpaceChars.cbrk(next + 1);
+ }
+ char end_sav = *end;
+ *end = 0;
+
+ if (!item_by_name(dst, s)) {
+ if (!is_number) {
+ get_local_var(dst, s);
+ } else if (is_float) {
+ dst = (float)strtod(s, nullptr);
+ } else
+ dst = strtol(s, nullptr, 10);
+
+ dst.pack_id = 0;
+ *end = end_sav;
+ return next;
+ }
+
+ // check array/not array
+ if (has_arr_index && !dst.is_array_field())
+ ythrow yexception() << "calc-expr: field " << s << " is not an array";
+
+ //if (!has_arr_index && dst.is_array_field())
+ // yexception("calc-expr: field %s is array, index required", s);
+
+ if (has_arr_index && (arr_index < 0 || arr_index >= dst.arr_length))
+ ythrow yexception() << "calc-expr: array index [" << arr_index << "] is out of range for field " << s << " (length is " << dst.arr_length << ")";
+
+ *end = end_sav;
+ dst.set_arrind(arr_index);
+ return next;
+}
+
+// BEGIN Stack calculator functions
+inline char* skipspace(char* c, int& bracket_depth) {
+ while ((ui8)*c <= ' ' && *c || *c == '(' || *c == ')') {
+ if (*c == '(')
+ bracket_depth++;
+ else if (*c == ')')
+ bracket_depth--;
+ c++;
+ }
+ return c;
+}
+
+void ensure_defined(const dump_item& item) {
+ if (item.type == DIT_LOCAL_VARIABLE) {
+ ythrow yexception() << "Usage of non-defined field or local variable '" << item.local_var_name << "'";
+ }
+}
+
+void TFieldCalculatorBase::emit_op(TVector<calc_op>& ops, calc_elem& left, calc_elem& right) {
+ int out_op = ops.size();
+ char oper = right.oper;
+ ensure_defined(right.item);
+ if (oper == OP_ASSIGN) {
+ if (left.item.type != DIT_LOCAL_VARIABLE) {
+ ythrow yexception() << "Assignment only to local variables is allowed";
+ }
+ if (local_vars.find(left.item.local_var_name) != local_vars.end()) {
+ ythrow yexception() << "Reassignment to the local variable " << left.item.local_var_name << " is not allowed";
+ }
+ local_vars[left.item.local_var_name] = right.item;
+ if (right.item.type == DIT_MATH_RESULT) {
+ calc_ops[right.item.arr_ind].is_variable = true;
+ }
+ left = right;
+ } else {
+ ensure_defined(left.item);
+ ops.push_back(calc_op(left, right));
+ left.item.type = DIT_MATH_RESULT;
+ left.item.arr_ind = out_op;
+ }
+}
+
+inline int get_op_prio(char c) {
+ switch (c) {
+ case OP_ASSIGN:
+ return 1;
+ case OP_QUESTION:
+ case OP_COLON:
+ return 2;
+ case OP_LOGICAL_OR:
+ return 3;
+ case OP_LOGICAL_AND:
+ return 4;
+ case OP_BITWISE_OR:
+ return 5;
+ case OP_XOR:
+ return 6;
+ case OP_BITWISE_AND:
+ return 7;
+ case OP_EQUAL:
+ case OP_NOT_EQUAL:
+ return 8;
+ case OP_LESS:
+ case OP_LESS_OR_EQUAL:
+ case OP_GREATER:
+ case OP_GREATER_OR_EQUAL:
+ return 9;
+ case OP_LEFT_SHIFT:
+ case OP_RIGHT_SHIFT:
+ return 10;
+ case OP_ADD:
+ case OP_SUBSTRACT:
+ return 11;
+ case OP_MULTIPLY:
+ case OP_DIVIDE:
+ case OP_MODULUS:
+ return 12;
+ case OP_REGEXP:
+ case OP_REGEXP_NOT:
+ return 13;
+ case OP_UNARY_NOT:
+ case OP_UNARY_COMPLEMENT:
+ case OP_UNARY_MINUS:
+ case OP_LOG:
+ case OP_LOG10:
+ case OP_ROUND:
+ return 14;
+ default:
+ return 0;
+ }
+}
+
+Operators get_oper(char*& c, bool unary_op_near) {
+ Operators cur_oper = OP_UNKNOWN;
+ switch (*c++) {
+ case '&':
+ if (*c == '&')
+ cur_oper = OP_LOGICAL_AND, c++;
+ else
+ cur_oper = OP_BITWISE_AND;
+ break;
+ case '|':
+ if (*c == '|')
+ cur_oper = OP_LOGICAL_OR, c++;
+ else
+ cur_oper = OP_BITWISE_OR;
+ break;
+ case '<':
+ if (*c == '=')
+ cur_oper = OP_LESS_OR_EQUAL, c++;
+ else if (*c == '<')
+ cur_oper = OP_LEFT_SHIFT, c++;
+ else
+ cur_oper = OP_LESS;
+ break;
+ case '>':
+ if (*c == '=')
+ cur_oper = OP_GREATER_OR_EQUAL, c++;
+ else if (*c == '>')
+ cur_oper = OP_RIGHT_SHIFT, c++;
+ else
+ cur_oper = OP_GREATER;
+ break;
+ case '!':
+ if (*c == '=')
+ cur_oper = OP_NOT_EQUAL, c++;
+ else if (*c == '~')
+ cur_oper = OP_REGEXP_NOT, c++;
+ else
+ cur_oper = OP_UNARY_NOT;
+ break;
+ case '=':
+ if (*c == '=')
+ cur_oper = OP_EQUAL, c++;
+ else if (*c == '~')
+ cur_oper = OP_REGEXP, c++;
+ else
+ cur_oper = OP_ASSIGN;
+ break;
+ case '-':
+ if (unary_op_near)
+ cur_oper = OP_UNARY_MINUS;
+ else
+ cur_oper = OP_SUBSTRACT;
+ break;
+ case '#':
+ if (!strncmp(c, "LOG#", 4)) {
+ cur_oper = OP_LOG;
+ c += 4;
+ } else if (!strncmp(c, "LOG10#", 6)) {
+ cur_oper = OP_LOG10;
+ c += 6;
+ } else if (!strncmp(c, "ROUND#", 6)) {
+ cur_oper = OP_ROUND;
+ c += 6;
+ }
+ break;
+ case '+':
+ cur_oper = OP_ADD;
+ break;
+ case '*':
+ cur_oper = OP_MULTIPLY;
+ break;
+ case '/':
+ cur_oper = OP_DIVIDE;
+ break;
+ case '%':
+ cur_oper = OP_MODULUS;
+ break;
+ case '^':
+ cur_oper = OP_XOR;
+ break;
+ case '~':
+ cur_oper = OP_UNARY_COMPLEMENT;
+ break;
+ case '?':
+ cur_oper = OP_QUESTION;
+ break;
+ case ':':
+ cur_oper = OP_COLON;
+ break;
+ }
+ return cur_oper;
+}
+// END Stack calculator functions
+
+void TFieldCalculatorBase::Compile(char** field_names, int field_count) {
+ out_el = 0, out_cond = 0;
+ autoarray<dump_item>(field_count).swap(printouts);
+ autoarray<dump_item>(field_count).swap(conditions);
+ local_vars.clear();
+
+ // parse arguments into calculator's "pseudo-code"
+ for (int el = 0; el < field_count; el++) {
+ char* c = field_names[el];
+ bool is_expr = !!*MathOpChars.brk(c), is_cond = *c == '?';
+ if (is_cond)
+ c++;
+ if (!is_expr && !is_cond) {
+ get_field(printouts[out_el], c);
+ ensure_defined(printouts[out_el]);
+ ++out_el;
+ continue;
+ } else { // Stack Calculator
+ const int maxstack = 64;
+ calc_elem fstack[maxstack]; // calculator's stack
+ int bdepth = 0; // brackets depth
+ int stack_cur = -1;
+ bool unary_op_near = false; // indicates that the next operator in unary
+ bool had_assignment_out_of_brackets = false;
+ int uop_seq = 0; // maintains right-to left order for unary operators
+ while (*(c = skipspace(c, bdepth))) {
+ /** https://wiki.yandex.ru/JandeksPoisk/Antispam/OwnersData/attselect#calc */
+ //printf("1.%i c = '%s'\n", unary_op_near, c);
+ Operators cur_oper = OP_UNKNOWN;
+ int op_prio = 0;
+ if (stack_cur >= 0) {
+ cur_oper = get_oper(c, unary_op_near);
+ op_prio = get_op_prio(cur_oper);
+ if (!op_prio)
+ ythrow yexception() << "calc-expr: Unsupported operator '" << c[-1] << "'";
+ op_prio += bdepth * 256 + uop_seq;
+ if (unary_op_near)
+ uop_seq += 20;
+ while (op_prio <= fstack[stack_cur].op_prio && stack_cur > 0) {
+ emit_op(calc_ops, fstack[stack_cur - 1], fstack[stack_cur]);
+ stack_cur--;
+ }
+ }
+ //printf("2.%i c = '%s'\n", unary_op_near, c);
+ had_assignment_out_of_brackets |= (bdepth == 0 && cur_oper == OP_ASSIGN);
+ c = skipspace(c, bdepth);
+ unary_op_near = *c == '-' && !isdigit((ui8)c[1]) || *c == '~' || (*c == '!' && c[1] != '=') ||
+ !strncmp(c, "#LOG#", 5) || !strncmp(c, "#LOG10#", 7) || !strncmp(c, "#ROUND#", 7);
+ if (!unary_op_near)
+ uop_seq = 0;
+ if (stack_cur >= maxstack - 1)
+ ythrow yexception() << "calc-expr: Math eval stack overflow!\n";
+ stack_cur++;
+ fstack[stack_cur].oper = cur_oper;
+ fstack[stack_cur].op_prio = op_prio;
+ //printf("3.%i c = '%s'\n", unary_op_near, c);
+ if (unary_op_near)
+ fstack[stack_cur].item = dump_item();
+ else
+ c = get_field(fstack[stack_cur].item, c);
+ }
+ while (stack_cur > 0) {
+ emit_op(calc_ops, fstack[stack_cur - 1], fstack[stack_cur]);
+ stack_cur--;
+ }
+ ensure_defined(fstack[0].item);
+ if (is_cond) {
+ if (had_assignment_out_of_brackets)
+ ythrow yexception() << "Assignment in condition. (Did you mean '==' instead of '='?)";
+ if (fstack[0].item.type != DIT_FAKE_ITEM) // Skip empty conditions: "?()".
+ conditions[out_cond++] = fstack[0].item;
+ } else if (!had_assignment_out_of_brackets) {
+ printouts[out_el++] = fstack[0].item;
+ }
+ }
+ }
+ // calc_ops will not grow any more, so arr_ind -> op
+ for (int n = 0; n < out_cond; n++)
+ conditions[n].rewrite_op(calc_ops.data());
+ for (int n = 0; n < out_el; n++)
+ printouts[n].rewrite_op(calc_ops.data());
+ for (auto& local_var : local_vars) {
+ local_var.second.rewrite_op(calc_ops.data());
+ }
+ for (int n = 0; n < (int)calc_ops.size(); n++) {
+ calc_ops[n].Left.rewrite_op(calc_ops.data());
+ calc_ops[n].Right.rewrite_op(calc_ops.data());
+ }
+}
+
+void dump_item::rewrite_op(const calc_op* ops) {
+ if (type == DIT_MATH_RESULT)
+ op = ops + arr_ind;
+}
+
+void TFieldCalculatorBase::MarkLocalVarsAsUncalculated() {
+ for (auto& local_var : local_vars) {
+ if (local_var.second.type == DIT_MATH_RESULT) {
+ local_var.second.op->calculated = false;
+ }
+ }
+}
+
+bool TFieldCalculatorBase::Cond(const char** d) {
+ MarkLocalVarsAsUncalculated();
+ for (int n = 0; n < out_cond; n++) {
+ /** https://wiki.yandex.ru/JandeksPoisk/Antispam/OwnersData/attselect#conditions */
+ eval_res_type res = conditions[n].eval(d);
+ bool is_true = res.type == 0 ? !!res.res_ui32 : res.type == 1 ? !!res.res_long : !!res.res_dbl;
+ if (!is_true)
+ return false;
+ }
+ return true;
+}
+
+bool TFieldCalculatorBase::CondById(const char** d, int condNumber) {
+ MarkLocalVarsAsUncalculated();
+ if (condNumber >= out_cond)
+ return false;
+ eval_res_type res = conditions[condNumber].eval(d);
+ bool is_true = res.type == 0 ? !!res.res_ui32 : res.type == 1 ? !!res.res_long : !!res.res_dbl;
+ if (!is_true)
+ return false;
+ return true;
+}
+
+void TFieldCalculatorBase::Print(FILE* p, const char** d, const char* Name) {
+ for (int n = 0; n < out_el; n++) {
+ if (printouts[n].type == DIT_NAME) {
+ fprintf(p, "%s", Name);
+ } else if (printouts[n].type == DIT_MATH_RESULT) { // calculate
+ eval_res_type res = printouts[n].eval(d);
+ switch (res.type) {
+ case 0:
+ fprintf(p, "%u", res.res_ui32);
+ break;
+ case 1:
+ fprintf(p, "%ld", res.res_long);
+ break;
+ case 2:
+ fprintf(p, "%f", res.res_dbl);
+ break;
+ }
+ } else {
+ printouts[n].print(p, d);
+ }
+ fprintf(p, n != out_el - 1 ? "\t" : "\n");
+ }
+}
+
+void TFieldCalculatorBase::CalcAll(const char** d, TVector<float>& result) const {
+ result.clear();
+ for (int n = 0; n < out_el; ++n) {
+ if (printouts[n].type == DIT_MATH_RESULT || printouts[n].type == DIT_FLOAT_FIELD) {
+ eval_res_type res = printouts[n].eval(d);
+ result.push_back(res.res_dbl);
+ }
+ }
+}
+
+void TFieldCalculatorBase::SelfTest() {
+ if (out_el < 1)
+ ythrow yexception() << "Please specify conditions for test mode";
+ const char* dummy = "";
+ eval_res_type res = printouts[0].eval(&dummy);
+ switch (res.type) {
+ case 0:
+ printf("%u\n", res.res_ui32);
+ break;
+ case 1:
+ printf("%ld\n", res.res_long);
+ break;
+ case 2:
+ printf("%f\n", res.res_dbl);
+ break;
+ }
+}
+
+void TFieldCalculatorBase::PrintDiff(const char* rec1, const char* rec2) {
+ for (size_t n = 0; n < named_dump_items[0].second; n++) {
+ const dump_item& field = named_dump_items[0].first[n].item;
+ if (!field.is_field())
+ continue; // not really a field
+ for (int ind = 0, arrsz = field.is_array_field() ? field.arr_length : 1; ind < arrsz; ind++) {
+ intptr_t sav_field_offset = field.field_offset;
+ const_cast<dump_item&>(field).set_arrind(ind);
+ if (field.eval(&rec1) == field.eval(&rec2)) {
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ continue;
+ }
+ if (field.is_array_field())
+ printf("\t%s[%i]: ", named_dump_items[0].first[n].name, ind);
+ else
+ printf("\t%s: ", named_dump_items[0].first[n].name);
+ field.print(stdout, &rec1);
+ printf(" -> ");
+ field.print(stdout, &rec2);
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ }
+ }
+}
+
+void TFieldCalculatorBase::DumpAll(IOutputStream& s, const char** d, const TStringBuf& delim) {
+ bool firstPrinted = false;
+ for (size_t k = 0; k < named_dump_items.size(); k++) {
+ const named_dump_item* fields = named_dump_items[k].first;
+ size_t numFields = named_dump_items[k].second;
+ const char* obj = d[k];
+ for (size_t n = 0; n < numFields; n++) {
+ const dump_item& field = fields[n].item;
+ if (!field.is_field())
+ continue;
+ for (int ind = 0, arrsz = field.is_array_field() ? field.arr_length : 1; ind < arrsz; ind++) {
+ if (firstPrinted)
+ s << delim;
+ else
+ firstPrinted = true;
+ s << fields[n].name;
+ if (field.is_array_field())
+ Printf(s, "[%i]", ind);
+ s << "=";
+ intptr_t sav_field_offset = field.field_offset;
+ const_cast<dump_item&>(field).set_arrind(ind);
+ field.print(&s, &obj);
+ const_cast<dump_item&>(field).field_offset = sav_field_offset;
+ }
+ }
+ }
+}
diff --git a/library/cpp/fieldcalc/field_calc.h b/library/cpp/fieldcalc/field_calc.h
new file mode 100644
index 0000000000..46bf371a60
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc.h
@@ -0,0 +1,136 @@
+#pragma once
+
+#include <cstdio>
+
+#include <library/cpp/deprecated/autoarray/autoarray.h>
+#include <util/generic/map.h>
+#include <util/generic/vector.h>
+#include <util/memory/segmented_string_pool.h>
+
+struct dump_item;
+struct calc_op;
+struct named_dump_item;
+struct calc_elem;
+class IOutputStream;
+
+template <class T>
+std::pair<const named_dump_item*, size_t> get_named_dump_items();
+
+class TFieldCalculatorBase {
+private:
+ segmented_string_pool pool;
+ void emit_op(TVector<calc_op>& ops, calc_elem& left, calc_elem& right);
+ void MarkLocalVarsAsUncalculated();
+
+protected:
+ autoarray<dump_item> printouts, conditions;
+ int out_el, out_cond;
+ TVector<calc_op> calc_ops; // operands for calculator, indexed by arr_ind for DIT_math_result
+
+ TVector<std::pair<const named_dump_item*, size_t>> named_dump_items;
+ TMap<const char*, dump_item> local_vars;
+
+ char* get_field(dump_item& dst, char* s);
+ bool get_local_var(dump_item& dst, char* s);
+ virtual bool item_by_name(dump_item& it, const char* name) const;
+
+ TFieldCalculatorBase();
+ virtual ~TFieldCalculatorBase();
+
+ bool Cond(const char** d);
+ bool CondById(const char** d, int condNumber);
+ void Print(FILE* p, const char** d, const char* Name);
+ void Compile(char** field_names, int field_count);
+ void SelfTest();
+ void PrintDiff(const char* d1, const char* d2);
+ void CalcAll(const char** d, TVector<float>& result) const;
+ void DumpAll(IOutputStream& s, const char** d, const TStringBuf& delim);
+};
+
+template <class T>
+class TFieldCalculator: protected TFieldCalculatorBase {
+public:
+ TFieldCalculator() {
+ named_dump_items.push_back(get_named_dump_items<T>());
+ }
+
+ ~TFieldCalculator() override = default;
+
+ bool Cond(const T& d) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Cond(&dd);
+ }
+
+ bool CondById(const T& d, int condNumber) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::CondById(&dd, condNumber);
+ }
+
+ void Print(const T& d, const char* Name) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Print(stdout, &dd, Name);
+ }
+
+ void Print(FILE* p, const T& d, const char* Name) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::Print(p, &dd, Name);
+ }
+
+ size_t Compile(char** field_names, int field_count) {
+ TFieldCalculatorBase::Compile(field_names, field_count);
+ return out_el; // number of fields printed
+ }
+
+ void SelfTest() {
+ return TFieldCalculatorBase::SelfTest();
+ }
+
+ void PrintDiff(const T& d1, const T& d2) {
+ return TFieldCalculatorBase::PrintDiff((const char*)&d1, (const char*)&d2);
+ }
+
+ void CalcAll(const T& d, TVector<float>& result) const {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::CalcAll(&dd, result);
+ }
+
+ // it appends to `result', clear it yourself
+ void DumpAll(IOutputStream& s, const T& d, const TStringBuf& delim) {
+ const char* dd = reinterpret_cast<const char*>(&d);
+ return TFieldCalculatorBase::DumpAll(s, &dd, delim);
+ }
+};
+
+template <class T, class T2>
+class TFieldCalculator2: protected TFieldCalculator<T> {
+public:
+ TFieldCalculator2() {
+ TFieldCalculator<T>::named_dump_items.push_back(get_named_dump_items<T2>());
+ }
+
+ ~TFieldCalculator2() override = default;
+
+ bool Cond(const T& d, const T2& d2) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Cond(dd);
+ }
+
+ bool CondById(const T& d, const T2& d2, int condNumber) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::CondById(dd, condNumber);
+ }
+
+ void Print(const T& d, const T2& d2, const char* Name) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Print(stdout, dd, Name);
+ }
+
+ void Print(FILE* p, const T& d, const T2& d2, const char* Name) {
+ const char* dd[2] = {reinterpret_cast<const char*>(&d), reinterpret_cast<const char*>(&d2)};
+ return TFieldCalculatorBase::Print(p, dd, Name);
+ }
+
+ size_t Compile(char** field_names, int field_count) {
+ return TFieldCalculator<T>::Compile(field_names, field_count);
+ }
+};
diff --git a/library/cpp/fieldcalc/field_calc_int.h b/library/cpp/fieldcalc/field_calc_int.h
new file mode 100644
index 0000000000..5f71fafbda
--- /dev/null
+++ b/library/cpp/fieldcalc/field_calc_int.h
@@ -0,0 +1,593 @@
+#pragma once
+
+#include <cmath>
+
+#include <util/system/defaults.h>
+#include <util/system/yassert.h>
+#include <util/memory/alloc.h>
+#include <util/generic/yexception.h>
+
+#include "lossy_types.h"
+#include "field_calc.h"
+
+// eval_res_type
+struct eval_res_type {
+ union {
+ ui32 res_ui32;
+ long res_long;
+ double res_dbl;
+ };
+ int type;
+ eval_res_type(ui32 v)
+ : res_ui32(v)
+ , type(0)
+ {
+ }
+ eval_res_type(long v)
+ : res_long(v)
+ , type(1)
+ {
+ }
+ eval_res_type(bool v)
+ : res_long(v)
+ , type(1)
+ {
+ }
+ eval_res_type(double v)
+ : res_dbl(v)
+ , type(2)
+ {
+ }
+ // a special null value for ternary operator
+ explicit eval_res_type()
+ : type(3)
+ {
+ }
+ operator ui32() const;
+ operator long() const;
+ operator double() const;
+ void to_long();
+ bool is_null() const;
+};
+
+inline bool eval_res_type::is_null() const {
+ return type == 3;
+}
+
+inline void eval_res_type::to_long() {
+ if (type == 0)
+ res_long = res_ui32;
+ else if (type == 2)
+ res_long = (long)res_dbl;
+ type = 1;
+}
+
+inline eval_res_type::operator ui32() const {
+ assert(type == 0);
+ return res_ui32;
+}
+
+inline eval_res_type::operator long() const {
+ assert(type == 0 || type == 1);
+ return type == 1 ? res_long : res_ui32;
+}
+
+inline eval_res_type::operator double() const {
+ return type == 2 ? res_dbl : type == 1 ? (double)res_long : (double)res_ui32;
+}
+
+inline eval_res_type operator+(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a + (ui32)b;
+ case 1:
+ return (long)a + (long)b;
+ /*case 2*/ default:
+ return (double)a + (double)b;
+ }
+}
+
+inline eval_res_type operator-(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ case 1:
+ return (long)a - (long)b;
+ /*case 2*/ default:
+ return (double)a - (double)b;
+ }
+}
+
+inline eval_res_type Minus(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return -(long)a.res_ui32;
+ case 1:
+ return -a.res_long;
+ /*case 2*/ default:
+ return -a.res_dbl;
+ }
+}
+
+inline eval_res_type Log(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return log(a.res_ui32);
+ case 1:
+ return log(a.res_long);
+ /*case 2*/ default:
+ return log(a.res_dbl);
+ }
+}
+
+inline eval_res_type Log10(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return log10(a.res_ui32);
+ case 1:
+ return log10(a.res_long);
+ /*case 2*/ default:
+ return log10(a.res_dbl);
+ }
+}
+
+inline eval_res_type Round(const eval_res_type& a) {
+ switch (a.type) {
+ case 0:
+ return a.res_ui32;
+ case 1:
+ return a.res_long;
+ /*case 2*/ default:
+ return round(a.res_dbl);
+ }
+}
+
+inline bool operator==(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a == (ui32)b;
+ case 1:
+ return (long)a == (long)b;
+ /*case 2*/ default:
+ return (double)a == (double)b;
+ }
+}
+
+inline bool operator<(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a < (ui32)b;
+ case 1:
+ return (long)a < (long)b;
+ /*case 2*/ default:
+ return (double)a < (double)b;
+ }
+}
+
+inline eval_res_type operator*(const eval_res_type& a, const eval_res_type& b) {
+ switch (std::max(a.type, b.type)) {
+ case 0:
+ return (ui32)a * (ui32)b;
+ case 1:
+ return (long)a * (long)b;
+ /*case 2*/ default:
+ return (double)a * (double)b;
+ }
+}
+
+inline double operator/(const eval_res_type& a, const eval_res_type& b) {
+ double a1 = a, b1 = b;
+ if (b1 == 0) {
+ if (a1 == 0)
+ return 0.; // assume that a should be 0
+ ythrow yexception() << "Division by zero"; // TODO: show parameter names
+ }
+ return a1 / b1;
+}
+
+// dump_item
+enum EDumpItemType {
+ DIT_FAKE_ITEM, // fake item - value never used
+ DIT_MATH_RESULT, // eval result
+ DIT_NAME,
+
+ DIT_FIELDS_START, // Start of item types for real fields
+
+ DIT_BOOL_FIELD,
+ DIT_UI8_FIELD,
+ DIT_UI16_FIELD,
+ DIT_UI32_FIELD,
+ DIT_I64_FIELD,
+ DIT_UI64_FIELD,
+ DIT_FLOAT_FIELD,
+ DIT_DOUBLE_FIELD,
+ DIT_TIME_T32_FIELD,
+ DIT_PF16UI32_FIELD,
+ DIT_PF16FLOAT_FIELD,
+ DIT_SF16FLOAT_FIELD,
+ DIT_STRING_FIELD, // new
+
+ DIT_FIELDS_END, // End of item types for real fields
+
+ DIT_LONG_CONST,
+ DIT_FLOAT_CONST,
+ DIT_STR_CONST,
+
+ DIT_INT_FUNCTION,
+ DIT_FLOAT_FUNCTION,
+ DIT_BOOL_FUNCTION,
+ DIT_STR_FUNCTION, // new
+ DIT_STRBUF_FUNCTION, // new
+
+ DIT_UI8_EXT_FUNCTION,
+ DIT_UI16_EXT_FUNCTION,
+ DIT_UI32_EXT_FUNCTION,
+ DIT_UI64_EXT_FUNCTION,
+
+ DIT_UI8_ENUM_EQ,
+ DIT_UI8_ENUM_SET,
+ DIT_UI16_ENUM_EQ,
+ DIT_UI16_ENUM_SET,
+ DIT_UI32_ENUM_EQ,
+ DIT_UI32_ENUM_SET,
+ DIT_INT_ENUM_FUNCTION_EQ,
+ DIT_INT_ENUM_FUNCTION_SET,
+
+ DIT_BOOL_FUNC_FIXED_STR,
+ DIT_UI8_FUNC_FIXED_STR,
+ DIT_UI16_FUNC_FIXED_STR,
+ DIT_UI32_FUNC_FIXED_STR,
+ DIT_I64_FUNC_FIXED_STR,
+ DIT_UI64_FUNC_FIXED_STR,
+ DIT_FLOAT_FUNC_FIXED_STR,
+ DIT_DOUBLE_FUNC_FIXED_STR,
+
+ DIT_RESOLVE_BY_NAME, //new - for external functions
+
+ DIT_LOCAL_VARIABLE
+};
+
+inline bool IsStringType(EDumpItemType type) {
+ return type == DIT_STRING_FIELD || type == DIT_STR_CONST || type == DIT_STR_FUNCTION || type == DIT_STRBUF_FUNCTION || type == DIT_RESOLVE_BY_NAME;
+}
+
+struct fake {};
+
+struct calc_op;
+
+typedef int (fake::*int_fn_t)() const;
+typedef float (fake::*float_fn_t)() const;
+typedef bool (fake::*bool_fn_t)() const;
+typedef ui16 (fake::*ui16_fn_t)() const;
+typedef ui32 (fake::*ui32_fn_t)() const;
+typedef bool (fake::*bool_strbuf_fn_t)(const TStringBuf&) const; // string -> bool
+typedef ui8 (fake::*ui8_strbuf_fn_t)(const TStringBuf&) const; // string -> ui8
+typedef ui16 (fake::*ui16_strbuf_fn_t)(const TStringBuf&) const; // string -> ui16
+typedef ui32 (fake::*ui32_strbuf_fn_t)(const TStringBuf&) const; // string -> ui32
+typedef i64 (fake::*i64_strbuf_fn_t)(const TStringBuf&) const; // string -> i64
+typedef ui64 (fake::*ui64_strbuf_fn_t)(const TStringBuf&) const; // string -> ui64
+typedef float (fake::*float_strbuf_fn_t)(const TStringBuf&) const; // string -> float
+typedef double (fake::*double_strbuf_fn_t)(const TStringBuf&) const; // string -> double
+typedef const char* (fake::*str_fn_t)() const;
+typedef const char* (fake::*strbuf_2_fn_t)(TString& buf, const char* nul) const;
+typedef TStringBuf (fake::*resolve_fn_t)(const TStringBuf&) const; // string -> string, $var -> "value"
+
+// note: we can not reuse the above signatures, calling conventions may differ
+typedef ui8 (*ui8_ext_fn_t)(const fake*);
+typedef ui16 (*ui16_ext_fn_t)(const fake*);
+typedef ui32 (*ui32_ext_fn_t)(const fake*);
+typedef ui64 (*ui64_ext_fn_t)(const fake*);
+
+struct dump_item {
+ EDumpItemType type;
+ int pack_id = 0;
+
+ union {
+ // fields
+ intptr_t field_offset;
+
+ // constants
+ long long_const;
+ float float_const;
+
+ // functions
+ int_fn_t int_fn;
+ float_fn_t float_fn;
+ bool_fn_t bool_fn;
+ str_fn_t str_fn;
+ strbuf_2_fn_t strbuf_2_fn;
+ resolve_fn_t resolve_fn;
+
+ bool_strbuf_fn_t bool_strbuf_fn;
+ ui8_strbuf_fn_t ui8_strbuf_fn;
+ ui16_strbuf_fn_t ui16_strbuf_fn;
+ ui32_strbuf_fn_t ui32_strbuf_fn;
+ i64_strbuf_fn_t i64_strbuf_fn;
+ ui64_strbuf_fn_t ui64_strbuf_fn;
+ float_strbuf_fn_t float_strbuf_fn;
+ double_strbuf_fn_t double_strbuf_fn;
+
+ ui8_ext_fn_t ui8_ext_fn;
+ ui16_ext_fn_t ui16_ext_fn;
+ ui32_ext_fn_t ui32_ext_fn;
+ ui64_ext_fn_t ui64_ext_fn;
+
+ // enum
+ int_fn_t int_enum_fn;
+
+ // for DIT_MATH_RESULT
+ const calc_op* op;
+ };
+
+ // for enum
+ ui32 enum_val;
+
+ // for local vars, also used to mark accessor functions to use them in dump
+ const char* local_var_name = nullptr;
+
+ int arr_ind; // externally initialized!
+ int arr_length;
+
+ mutable TString the_buf; // buffer for string function, string constants also here
+
+ // Ctors
+ dump_item()
+ : type(DIT_FAKE_ITEM)
+ , field_offset(0)
+ {
+ }
+
+ dump_item(bool* ptr, int arrlen = 0)
+ : type(DIT_BOOL_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui8* ptr, int arrlen = 0)
+ : type(DIT_UI8_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui16* ptr, int arrlen = 0)
+ : type(DIT_UI16_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui32* ptr, int arrlen = 0)
+ : type(DIT_UI32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(i64* ptr, int arrlen = 0)
+ : type(DIT_I64_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(ui64* ptr, int arrlen = 0)
+ : type(DIT_UI64_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(float* ptr, int arrlen = 0)
+ : type(DIT_FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(double* ptr, int arrlen = 0)
+ : type(DIT_DOUBLE_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(time_t32* ptr, int arrlen = 0)
+ : type(DIT_TIME_T32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(pf16ui32* ptr, int arrlen = 0)
+ : type(DIT_PF16UI32_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(pf16float* ptr, int arrlen = 0)
+ : type(DIT_PF16FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(sf16float* ptr, int arrlen = 0)
+ : type(DIT_SF16FLOAT_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+ dump_item(char* ptr, int arrlen = 0)
+ : type(DIT_STRING_FIELD)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , arr_length(arrlen)
+ {
+ }
+
+ dump_item(long val)
+ : type(DIT_LONG_CONST)
+ , long_const(val)
+ {
+ }
+ dump_item(float val)
+ : type(DIT_FLOAT_CONST)
+ , float_const(val)
+ {
+ }
+ dump_item(TString& val)
+ : type(DIT_STR_CONST)
+ , the_buf(val)
+ {
+ }
+
+ dump_item(int_fn_t fn)
+ : type(DIT_INT_FUNCTION)
+ , int_fn(fn)
+ {
+ }
+ dump_item(float_fn_t fn)
+ : type(DIT_FLOAT_FUNCTION)
+ , float_fn(fn)
+ {
+ }
+ dump_item(bool_fn_t fn)
+ : type(DIT_BOOL_FUNCTION)
+ , bool_fn(fn)
+ {
+ }
+ dump_item(bool_strbuf_fn_t fn, const char* name)
+ : type(DIT_BOOL_FUNC_FIXED_STR)
+ , bool_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui8_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI8_FUNC_FIXED_STR)
+ , ui8_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui16_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI16_FUNC_FIXED_STR)
+ , ui16_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui32_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI32_FUNC_FIXED_STR)
+ , ui32_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(i64_strbuf_fn_t fn, const char* name)
+ : type(DIT_I64_FUNC_FIXED_STR)
+ , i64_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(ui64_strbuf_fn_t fn, const char* name)
+ : type(DIT_UI64_FUNC_FIXED_STR)
+ , ui64_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(float_strbuf_fn_t fn, const char* name)
+ : type(DIT_FLOAT_FUNC_FIXED_STR)
+ , float_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(double_strbuf_fn_t fn, const char* name)
+ : type(DIT_DOUBLE_FUNC_FIXED_STR)
+ , double_strbuf_fn(fn)
+ , the_buf(name)
+ {
+ }
+ dump_item(str_fn_t fn)
+ : type(DIT_STR_FUNCTION)
+ , str_fn(fn)
+ {
+ }
+ dump_item(strbuf_2_fn_t fn)
+ : type(DIT_STRBUF_FUNCTION)
+ , strbuf_2_fn(fn)
+ {
+ }
+
+ dump_item(ui8_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI8_EXT_FUNCTION)
+ , ui8_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui16_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI16_EXT_FUNCTION)
+ , ui16_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui32_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI32_EXT_FUNCTION)
+ , ui32_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+ dump_item(ui64_ext_fn_t fn, const char* lvn = nullptr)
+ : type(DIT_UI64_EXT_FUNCTION)
+ , ui64_ext_fn(fn)
+ , local_var_name(lvn)
+ {
+ }
+
+ dump_item(ui8* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI8_ENUM_SET : DIT_UI8_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(ui16* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI16_ENUM_SET : DIT_UI16_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(ui32* ptr, ui32 val, bool bitset)
+ : type(bitset ? DIT_UI32_ENUM_SET : DIT_UI32_ENUM_EQ)
+ , field_offset(reinterpret_cast<intptr_t>(ptr))
+ , enum_val(val)
+ {
+ }
+
+ dump_item(int_fn_t fn, ui32 val, bool bitset)
+ : type(bitset ? DIT_INT_ENUM_FUNCTION_SET : DIT_INT_ENUM_FUNCTION_EQ)
+ , int_enum_fn(fn)
+ , enum_val(val)
+ {
+ }
+
+ dump_item(resolve_fn_t fn, const char* name)
+ : type(DIT_RESOLVE_BY_NAME)
+ , resolve_fn(fn)
+ , the_buf(name)
+ {
+ } //name of variable saved in the_buf
+
+ // Functions
+ template <class TOut> // implemented for FILE*, TString* (appends) and IOutputStream*
+ void print(TOut* p, const char** dd) const;
+ TStringBuf GetStrBuf(const char** dd) const; // for char-types only!
+ eval_res_type eval(const char** dd) const;
+ void set_arrind(int arrind);
+ void rewrite_op(const calc_op* ops);
+
+ bool is_accessor_func() const {
+ return type >= DIT_INT_FUNCTION && type <= DIT_UI64_EXT_FUNCTION && local_var_name;
+ }
+
+ bool is_field() const {
+ return type > DIT_FIELDS_START && type < DIT_FIELDS_END || is_accessor_func();
+ }
+
+ bool is_array_field() const {
+ return is_field() && arr_length > 0;
+ }
+};
+
+// named_dump_item
+struct named_dump_item {
+ const char* name;
+ dump_item item;
+};
diff --git a/library/cpp/fieldcalc/lossy_types.h b/library/cpp/fieldcalc/lossy_types.h
new file mode 100644
index 0000000000..98acfea902
--- /dev/null
+++ b/library/cpp/fieldcalc/lossy_types.h
@@ -0,0 +1,52 @@
+#pragma once
+
+#include <util/generic/cast.h>
+
+// although target value is float, this thing is only used as unsigned int container
+struct pf16ui32 {
+ ui16 val;
+ pf16ui32()
+ : val(0)
+ {
+ }
+ void operator=(ui32 t) {
+ val = static_cast<ui16>(BitCast<ui32>(static_cast<float>(t)) >> 15);
+ }
+ operator ui32() const {
+ return (ui32)BitCast<float>((ui32)(val << 15));
+ }
+};
+
+// unsigned float value
+struct pf16float {
+ ui16 val;
+ pf16float()
+ : val(0)
+ {
+ }
+ void operator=(float t) {
+ assert(t >= 0.);
+ val = static_cast<ui16>(BitCast<ui32>(t) >> 15);
+ }
+ operator float() const {
+ return BitCast<float>((ui32)(val << 15));
+ }
+};
+
+// signed float value
+struct sf16float {
+ ui16 val;
+ sf16float()
+ : val(0)
+ {
+ }
+ void operator=(float t) {
+ assert(t >= 0.);
+ val = BitCast<ui32>(t) >> 16;
+ }
+ operator float() const {
+ return BitCast<float>((ui32)(val << 16));
+ }
+};
+
+typedef i32 time_t32; // not really lossy, should be placed somewhere else
diff --git a/library/cpp/fieldcalc/ya.make b/library/cpp/fieldcalc/ya.make
new file mode 100644
index 0000000000..9796592996
--- /dev/null
+++ b/library/cpp/fieldcalc/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/deprecated/autoarray
+)
+
+SRCS(
+ field_calc.cpp
+ lossy_types.h
+ field_calc_int.h
+)
+
+END()
diff --git a/library/cpp/malloc/galloc/malloc-info.cpp b/library/cpp/malloc/galloc/malloc-info.cpp
new file mode 100644
index 0000000000..fbcfa7ee06
--- /dev/null
+++ b/library/cpp/malloc/galloc/malloc-info.cpp
@@ -0,0 +1,9 @@
+#include <library/cpp/malloc/api/malloc.h>
+
+using namespace NMalloc;
+
+TMallocInfo NMalloc::MallocInfo() {
+ TMallocInfo r;
+ r.Name = "tcmalloc";
+ return r;
+}
diff --git a/library/cpp/malloc/galloc/ya.make b/library/cpp/malloc/galloc/ya.make
new file mode 100644
index 0000000000..b6646a6cf6
--- /dev/null
+++ b/library/cpp/malloc/galloc/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+NO_UTIL()
+ALLOCATOR_IMPL()
+
+PEERDIR(
+ library/cpp/malloc/api
+ contrib/deprecated/galloc
+)
+
+SRCS(
+ malloc-info.cpp
+)
+
+END()
diff --git a/library/cpp/on_disk/multi_blob/multiblob.cpp b/library/cpp/on_disk/multi_blob/multiblob.cpp
new file mode 100644
index 0000000000..d92b31e613
--- /dev/null
+++ b/library/cpp/on_disk/multi_blob/multiblob.cpp
@@ -0,0 +1,67 @@
+#include <util/generic/yexception.h>
+#include <util/system/align.h>
+
+#include <library/cpp/on_disk/chunks/reader.h>
+
+#include "multiblob.h"
+
+void TSubBlobs::ReadMultiBlob(const TBlob& multi) {
+ if (multi.Size() < sizeof(TMultiBlobHeader)) {
+ ythrow yexception() << "not a blob, too small";
+ }
+
+ Multi = multi;
+ memcpy((void*)&Header, Multi.Data(), sizeof(TMultiBlobHeader));
+
+ if (Header.BlobMetaSig != BLOBMETASIG) {
+ if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig) {
+ if (ReadChunkedData(multi))
+ return;
+ }
+ ythrow yexception() << "is not a blob, MetaSig was read: "
+ << Header.BlobMetaSig
+ << ", must be" << BLOBMETASIG;
+ }
+
+ if (Header.BlobRecordSig != TMultiBlobHeader::RecordSig)
+ ythrow yexception() << "unknown multiblob RecordSig "
+ << Header.BlobRecordSig;
+
+ reserve(size() + Header.Count);
+ if (Header.Flags & EMF_INTERLAY) {
+ size_t pos = Header.HeaderSize();
+ for (size_t i = 0; i < Header.Count; ++i) {
+ pos = AlignUp<ui64>(pos, sizeof(ui64));
+ ui64 size = *((ui64*)((const char*)multi.Data() + pos));
+ pos = AlignUp<ui64>(pos + sizeof(ui64), Header.Align);
+ push_back(multi.SubBlob(pos, pos + size));
+ pos += size;
+ }
+ } else {
+ const ui64* sizes = Header.Sizes(multi.Data());
+ size_t pos = Header.HeaderSize() + Header.Count * sizeof(ui64);
+ for (size_t i = 0; i < Header.Count; ++i) {
+ pos = AlignUp<ui64>(pos, Header.Align);
+ push_back(multi.SubBlob(pos, pos + *sizes));
+ pos += *sizes;
+ sizes++;
+ }
+ }
+}
+
+bool TSubBlobs::ReadChunkedData(const TBlob& multi) noexcept {
+ Multi = multi;
+ memset((void*)&Header, 0, sizeof(Header));
+
+ TChunkedDataReader reader(Multi);
+ Header.Count = reader.GetBlocksCount();
+ resize(GetHeader()->Count);
+ for (size_t i = 0; i < size(); ++i)
+ // We can use TBlob::NoCopy() because of reader.GetBlock(i) returns
+ // address into memory of multi blob.
+ // This knowledge was acquired from implementation of
+ // TChunkedDataReader, so we need care about any changes that.
+ (*this)[i] = TBlob::NoCopy(reader.GetBlock(i), reader.GetBlockLen(i));
+ Header.Flags |= EMF_CHUNKED_DATA_READER;
+ return true;
+}
diff --git a/library/cpp/on_disk/multi_blob/multiblob.h b/library/cpp/on_disk/multi_blob/multiblob.h
new file mode 100644
index 0000000000..b40a5ae6af
--- /dev/null
+++ b/library/cpp/on_disk/multi_blob/multiblob.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <util/generic/vector.h>
+#include <util/memory/blob.h>
+
+#define BLOBMETASIG 0x3456789Au
+
+enum E_Multiblob_Flags {
+ // if EMF_INTERLAY is clear
+ // multiblob format
+ // HeaderSize() bytes for TMultiBlobHeader
+ // Count*sizeof(ui64) bytes for blob sizes
+ // blob1
+ // (alignment)
+ // blob2
+ // (alignment)
+ // ...
+ // (alignment)
+ // blobn
+ // if EMF_INTERLAY is set
+ // multiblob format
+ // HeaderSize() bytes for TMultiBlobHeader
+ // size1 ui64, the size of 1st blob
+ // blob1
+ // (alignment)
+ // size2 ui64, the size of 2nd blob
+ // blob2
+ // (alignment)
+ // ...
+ // (alignment)
+ // sizen ui64, the size of n'th blob
+ // blobn
+ EMF_INTERLAY = 1,
+
+ // Means that multiblob contains blocks in TChunkedDataReader format
+ // Legacy, use it only for old files, created for TChunkedDataReader
+ EMF_CHUNKED_DATA_READER = 2,
+
+ // Flags that may be configured for blobbuilder in client code
+ EMF_WRITEABLE = EMF_INTERLAY,
+};
+
+struct TMultiBlobHeader {
+ // data
+ ui32 BlobMetaSig;
+ ui32 BlobRecordSig;
+ ui64 Count; // count of sub blobs
+ ui32 Align; // alignment for every subblob
+ ui32 Flags;
+ static const ui32 RecordSig = 0x23456789;
+ static inline size_t HeaderSize() {
+ return 4 * sizeof(ui64);
+ }
+ inline const ui64* Sizes(const void* Data) const {
+ return (const ui64*)((const char*)Data + HeaderSize());
+ }
+};
+
+class TSubBlobs: public TVector<TBlob> {
+public:
+ TSubBlobs() {
+ }
+ TSubBlobs(const TBlob& multi) {
+ ReadMultiBlob(multi);
+ }
+ void ReadMultiBlob(const TBlob& multi);
+ const TMultiBlobHeader* GetHeader() const {
+ return (const TMultiBlobHeader*)&Header;
+ }
+
+protected:
+ TMultiBlobHeader Header;
+ TBlob Multi;
+
+private:
+ bool ReadChunkedData(const TBlob& multi) noexcept;
+};
diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.cpp b/library/cpp/on_disk/multi_blob/multiblob_builder.cpp
new file mode 100644
index 0000000000..44aa4a6c2f
--- /dev/null
+++ b/library/cpp/on_disk/multi_blob/multiblob_builder.cpp
@@ -0,0 +1,146 @@
+#include <util/memory/tempbuf.h>
+#include <util/system/align.h>
+
+#include "multiblob_builder.h"
+
+/*
+ * TBlobSaverMemory
+ */
+TBlobSaverMemory::TBlobSaverMemory(const void* ptr, size_t size)
+ : Blob(TBlob::NoCopy(ptr, size))
+{
+}
+
+TBlobSaverMemory::TBlobSaverMemory(const TBlob& blob)
+ : Blob(blob)
+{
+}
+
+void TBlobSaverMemory::Save(IOutputStream& output, ui32 /*flags*/) {
+ output.Write((void*)Blob.Data(), Blob.Length());
+}
+
+size_t TBlobSaverMemory::GetLength() {
+ return Blob.Length();
+}
+
+/*
+ * TBlobSaverFile
+ */
+
+TBlobSaverFile::TBlobSaverFile(TFile file)
+ : File(file)
+{
+ Y_ASSERT(File.IsOpen());
+}
+
+TBlobSaverFile::TBlobSaverFile(const char* filename, EOpenMode oMode)
+ : File(filename, oMode)
+{
+ Y_ASSERT(File.IsOpen());
+}
+
+void TBlobSaverFile::Save(IOutputStream& output, ui32 /*flags*/) {
+ TTempBuf buffer(1 << 20);
+ while (size_t size = File.Read((void*)buffer.Data(), buffer.Size()))
+ output.Write((void*)buffer.Data(), size);
+}
+
+size_t TBlobSaverFile::GetLength() {
+ return File.GetLength();
+}
+
+/*
+ * TMultiBlobBuilder
+ */
+
+TMultiBlobBuilder::TMultiBlobBuilder(bool isOwn)
+ : IsOwner(isOwn)
+{
+}
+
+TMultiBlobBuilder::~TMultiBlobBuilder() {
+ if (IsOwner)
+ DeleteSubBlobs();
+}
+
+namespace {
+ ui64 PadToAlign(IOutputStream& output, ui64 fromPos, ui32 align) {
+ ui64 toPos = AlignUp<ui64>(fromPos, align);
+ for (; fromPos < toPos; ++fromPos) {
+ output << (char)0;
+ }
+ return toPos;
+ }
+}
+
+void TMultiBlobBuilder::Save(IOutputStream& output, ui32 flags) {
+ TMultiBlobHeader header;
+ memset((void*)&header, 0, sizeof(header));
+ header.BlobMetaSig = BLOBMETASIG;
+ header.BlobRecordSig = TMultiBlobHeader::RecordSig;
+ header.Count = Blobs.size();
+ header.Align = ALIGN;
+ header.Flags = flags & EMF_WRITEABLE;
+ output.Write((void*)&header, sizeof(header));
+ for (size_t i = sizeof(header); i < header.HeaderSize(); ++i)
+ output << (char)0;
+ ui64 pos = header.HeaderSize();
+ if (header.Flags & EMF_INTERLAY) {
+ for (size_t i = 0; i < Blobs.size(); ++i) {
+ ui64 size = Blobs[i]->GetLength();
+ pos = PadToAlign(output, pos, sizeof(ui64)); // Align size record
+ output.Write((void*)&size, sizeof(ui64));
+ pos = PadToAlign(output, pos + sizeof(ui64), header.Align); // Align blob
+ Blobs[i]->Save(output, header.Flags);
+ pos += size;
+ }
+ } else {
+ for (size_t i = 0; i < Blobs.size(); ++i) {
+ ui64 size = Blobs[i]->GetLength();
+ output.Write((void*)&size, sizeof(ui64));
+ }
+ pos += Blobs.size() * sizeof(ui64);
+ for (size_t i = 0; i < Blobs.size(); ++i) {
+ pos = PadToAlign(output, pos, header.Align);
+ Blobs[i]->Save(output, header.Flags);
+ pos += Blobs[i]->GetLength();
+ }
+ }
+ // Compensate for imprecise size
+ for (ui64 len = GetLength(); pos < len; ++pos) {
+ output << (char)0;
+ }
+}
+
+size_t TMultiBlobBuilder::GetLength() {
+ // Sizes may be diferent with and without EMF_INTERLAY, so choose greater of 2
+ size_t resNonInter = TMultiBlobHeader::HeaderSize() + Blobs.size() * sizeof(ui64);
+ size_t resInterlay = TMultiBlobHeader::HeaderSize();
+ for (size_t i = 0; i < Blobs.size(); ++i) {
+ resInterlay = AlignUp<ui64>(resInterlay, sizeof(ui64)) + sizeof(ui64);
+ resInterlay = AlignUp<ui64>(resInterlay, ALIGN) + Blobs[i]->GetLength();
+ resNonInter = AlignUp<ui64>(resNonInter, ALIGN) + Blobs[i]->GetLength();
+ }
+ resInterlay = AlignUp<ui64>(resInterlay, ALIGN);
+ resNonInter = AlignUp<ui64>(resNonInter, ALIGN);
+ return Max(resNonInter, resInterlay);
+}
+
+TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() {
+ return Blobs;
+}
+
+const TMultiBlobBuilder::TSavers& TMultiBlobBuilder::GetBlobs() const {
+ return Blobs;
+}
+
+void TMultiBlobBuilder::AddBlob(IBlobSaverBase* blob) {
+ Blobs.push_back(blob);
+}
+
+void TMultiBlobBuilder::DeleteSubBlobs() {
+ for (size_t i = 0; i < Blobs.size(); ++i)
+ delete Blobs[i];
+ Blobs.clear();
+}
diff --git a/library/cpp/on_disk/multi_blob/multiblob_builder.h b/library/cpp/on_disk/multi_blob/multiblob_builder.h
new file mode 100644
index 0000000000..a8e3c6d35e
--- /dev/null
+++ b/library/cpp/on_disk/multi_blob/multiblob_builder.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <util/system/align.h>
+#include <util/stream/output.h>
+#include <util/stream/file.h>
+#include <util/draft/holder_vector.h>
+
+#include "multiblob.h"
+
+class IBlobSaverBase {
+public:
+ virtual ~IBlobSaverBase() {
+ }
+ virtual void Save(IOutputStream& output, ui32 flags = 0) = 0;
+ virtual size_t GetLength() = 0;
+};
+
+inline void MultiBlobSave(IOutputStream& output, IBlobSaverBase& saver) {
+ saver.Save(output);
+}
+
+class TBlobSaverMemory: public IBlobSaverBase {
+public:
+ TBlobSaverMemory(const void* ptr, size_t size);
+ TBlobSaverMemory(const TBlob& blob);
+ void Save(IOutputStream& output, ui32 flags = 0) override;
+ size_t GetLength() override;
+
+private:
+ TBlob Blob;
+};
+
+class TBlobSaverFile: public IBlobSaverBase {
+public:
+ TBlobSaverFile(TFile file);
+ TBlobSaverFile(const char* filename, EOpenMode oMode = RdOnly);
+ void Save(IOutputStream& output, ui32 flags = 0) override;
+ size_t GetLength() override;
+
+protected:
+ TFile File;
+};
+
+class TMultiBlobBuilder: public IBlobSaverBase {
+protected:
+ // Data will be stored with default alignment DEVTOOLS-4548
+ static const size_t ALIGN = 16;
+
+public:
+ typedef TVector<IBlobSaverBase*> TSavers;
+
+ TMultiBlobBuilder(bool isOwn = true);
+ ~TMultiBlobBuilder() override;
+ void Save(IOutputStream& output, ui32 flags = 0) override;
+ size_t GetLength() override;
+ TSavers& GetBlobs();
+ const TSavers& GetBlobs() const;
+ void AddBlob(IBlobSaverBase* blob);
+ void DeleteSubBlobs();
+
+protected:
+ TSavers Blobs;
+ bool IsOwner;
+};
diff --git a/library/cpp/on_disk/multi_blob/ya.make b/library/cpp/on_disk/multi_blob/ya.make
new file mode 100644
index 0000000000..50615fc901
--- /dev/null
+++ b/library/cpp/on_disk/multi_blob/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ multiblob.cpp
+ multiblob_builder.cpp
+)
+
+PEERDIR(
+ library/cpp/on_disk/chunks
+ util/draft
+)
+
+END()
diff --git a/library/cpp/on_disk/st_hash/fake.cpp b/library/cpp/on_disk/st_hash/fake.cpp
new file mode 100644
index 0000000000..ef5af4d432
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/fake.cpp
@@ -0,0 +1,4 @@
+#include "save_stl.h"
+#include "static_hash.h"
+#include "static_hash_map.h"
+#include "sthash_iterators.h"
diff --git a/library/cpp/on_disk/st_hash/save_stl.h b/library/cpp/on_disk/st_hash/save_stl.h
new file mode 100644
index 0000000000..00f8f0e20d
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/save_stl.h
@@ -0,0 +1,84 @@
+#pragma once
+
+#include <util/generic/hash.h>
+#include <util/system/yassert.h>
+#include <util/stream/output.h>
+
+// this structure might be replaced with sthashtable class
+template <class HF, class Eq, class size_type>
+struct sthashtable_nvm_sv {
+ sthashtable_nvm_sv() {
+ if (sizeof(sthashtable_nvm_sv) != sizeof(HF) + sizeof(Eq) + 3 * sizeof(size_type)) {
+ memset(this, 0, sizeof(sthashtable_nvm_sv));
+ }
+ }
+
+ sthashtable_nvm_sv(const HF& phf, const Eq& peq, const size_type& pnb, const size_type& pne, const size_type& pnd)
+ : sthashtable_nvm_sv()
+ {
+ hf = phf;
+ eq = peq;
+ num_buckets = pnb;
+ num_elements = pne;
+ data_end_off = pnd;
+ }
+
+ HF hf;
+ Eq eq;
+ size_type num_buckets;
+ size_type num_elements;
+ size_type data_end_off;
+};
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * Working with stHash does not depend on the template parameters, because the content of stHash is not used inside this method.
+ */
+template <class V, class K, class HF, class Ex, class Eq, class A>
+template <class KeySaver>
+inline int THashTable<V, K, HF, Ex, Eq, A>::save_for_st(IOutputStream* stream, KeySaver& ks, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash) const {
+ Y_ASSERT(!stHash || stHash->bucket_count() == bucket_count());
+ typedef sthashtable_nvm_sv<HF, Eq, typename KeySaver::TSizeType> sv_type;
+ sv_type sv = {this->_get_hash_fun(), this->_get_key_eq(), static_cast<typename KeySaver::TSizeType>(buckets.size()), static_cast<typename KeySaver::TSizeType>(num_elements), 0};
+ // to do: m.b. use just the size of corresponding object?
+ typename KeySaver::TSizeType cur_off = sizeof(sv_type) +
+ (sv.num_buckets + 1) * sizeof(typename KeySaver::TSizeType);
+ sv.data_end_off = cur_off;
+ const_iterator n;
+ for (n = begin(); n != end(); ++n) {
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(*n));
+ }
+ typename KeySaver::TSizeType* sb = stHash ? (typename KeySaver::TSizeType*)(stHash->buckets()) : nullptr;
+ if (stHash)
+ sv.data_end_off += static_cast<typename KeySaver::TSizeType>(sb[buckets.size()] - sb[0]);
+ //saver.Align(sizeof(char*));
+ stream->Write(&sv, sizeof(sv));
+
+ size_type i;
+ //save vector
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ stream->Write(&cur_off, sizeof(cur_off));
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ cur_off += static_cast<typename KeySaver::TSizeType>(ks.GetRecordSize(cur->val));
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ cur_off += static_cast<typename KeySaver::TSizeType>(sb[i + 1] - sb[i]);
+ }
+ stream->Write(&cur_off, sizeof(cur_off)); // end mark
+ for (i = 0; i < buckets.size(); ++i) {
+ node* cur = buckets[i];
+ if (cur) {
+ while (!((uintptr_t)cur & 1)) {
+ ks.SaveRecord(stream, cur->val);
+ cur = cur->next;
+ }
+ }
+ if (stHash)
+ stream->Write((const char*)stHash + sb[i], sb[i + 1] - sb[i]);
+ }
+ return 0;
+}
diff --git a/library/cpp/on_disk/st_hash/static_hash.h b/library/cpp/on_disk/st_hash/static_hash.h
new file mode 100644
index 0000000000..ca7a6ccd36
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash.h
@@ -0,0 +1,420 @@
+#pragma once
+
+#include "save_stl.h"
+#include "sthash_iterators.h"
+
+#include <util/generic/hash.h>
+#include <util/generic/vector.h>
+#include <util/generic/buffer.h>
+#include <util/generic/cast.h>
+#include <util/generic/yexception.h> // for save/load only
+#include <util/stream/file.h>
+#include <util/stream/buffer.h>
+#include <utility>
+
+#include <memory>
+#include <algorithm>
+#include <functional>
+
+#include <cstdlib>
+#include <cstddef>
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4624) // 'destructor could not be generated because a base class destructor is inaccessible'
+#endif
+
+template <class HashType, class KeySaver>
+inline void SaveHashToStreamEx(HashType& hash, IOutputStream* stream) {
+ KeySaver ks;
+ if (hash.save_for_st(stream, ks))
+ ythrow yexception() << "Could not save hash to stream";
+}
+
+template <class HashType>
+inline void SaveHashToStream(HashType& hash, IOutputStream* stream) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToStreamEx<HashType, KeySaver>(hash, stream);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToFileEx(HashType& hash, const char* fileName) {
+ TFileOutput output(fileName);
+ SaveHashToStreamEx<HashType, KeySaver>(hash, &output);
+}
+
+template <class HashType>
+inline void SaveHashToFile(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashSetToFile(HashType& hash, const char* fileName) {
+ typedef TSthashSetWriter<typename HashType::key_type, ui64> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType>
+inline void SaveHashToFile32(HashType& hash, const char* fileName) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ return SaveHashToFileEx<HashType, KeySaver>(hash, fileName);
+}
+
+template <class HashType, class KeySaver>
+inline void SaveHashToBufferEx(HashType& hash, TBuffer& buffer, sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* stHash = nullptr) {
+ TBufferOutput stream(buffer);
+ KeySaver ks;
+ if (hash.save_for_st(&stream, ks, stHash))
+ ythrow yexception() << "Could not save hash to memory";
+}
+
+template <class HashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+/**
+ * Some hack to save both THashMap and sthash.
+ * THashMap and sthash must have same bucket_count().
+ */
+template <class HashType, class StHashType>
+inline void SaveHashToBuffer(HashType& hash, TBuffer& buffer, StHashType* stHash) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui64> KeySaver;
+ typedef sthash<int, int, THash<int>, TEqualTo<int>, typename KeySaver::TSizeType>* SH;
+
+ SH sh = reinterpret_cast<SH>(stHash);
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer, sh);
+}
+
+template <class HashType>
+inline void SaveHashToBuffer32(HashType& hash, TBuffer& buffer) {
+ typedef TSthashWriter<typename HashType::key_type, typename HashType::mapped_type, ui32> KeySaver;
+ SaveHashToBufferEx<HashType, KeySaver>(hash, buffer);
+}
+
+template <class Iter, typename size_type_f = ui64>
+class sthashtable {
+public:
+ typedef typename Iter::TKeyType key_type;
+ typedef typename Iter::TValueType value_type;
+ typedef typename Iter::THasherType hasher;
+ typedef typename Iter::TKeyEqualType key_equal;
+
+ typedef size_type_f size_type;
+ typedef ptrdiff_t difference_type;
+ typedef const value_type* const_pointer;
+ typedef const value_type& const_reference;
+
+ typedef Iter const_iterator;
+
+ const hasher hash_funct() const {
+ return hash;
+ }
+ const key_equal key_eq() const {
+ return equals;
+ }
+
+private:
+ const hasher hash;
+ const key_equal equals;
+
+private:
+ const_iterator iter_at_bucket(size_type bucket) const {
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ }
+
+ const_iterator iter_at_bucket_or_end(size_type bucket) const {
+ if (bucket < num_buckets)
+ return (const_iterator)(((char*)this + buckets()[bucket]));
+ else
+ return end();
+ }
+
+ const size_type num_buckets;
+ const size_type num_elements;
+ const size_type data_end_off;
+
+protected: //shut up gcc warning
+ // we can't construct/destroy this object at all!
+ sthashtable();
+ sthashtable(const sthashtable& ht);
+ ~sthashtable();
+
+public:
+ // const size_type *buckets;
+ const size_type* buckets() const {
+ return (size_type*)((char*)this + sizeof(*this));
+ }
+ const size_type buckets(size_type n) const {
+ return buckets()[n];
+ }
+
+ size_type size() const {
+ return num_elements;
+ }
+ size_type max_size() const {
+ return size_type(-1);
+ }
+ bool empty() const {
+ return size() == 0;
+ }
+
+ const_iterator begin() const {
+ return num_buckets ? iter_at_bucket(0) : end();
+ }
+
+ const_iterator end() const {
+ return (const_iterator)(((char*)this + data_end_off));
+ }
+
+public:
+ size_type size_in_bytes() const {
+ return data_end_off;
+ }
+
+ size_type bucket_count() const {
+ return num_buckets;
+ }
+
+ size_type elems_in_bucket(size_type bucket) const {
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(bucket);
+ const_iterator last = iter_at_bucket_or_end(bucket + 1);
+
+ for (; first != last; ++first)
+ ++result;
+ return result;
+ }
+
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ size_type n = bkt_num_key(key);
+ const_iterator first(iter_at_bucket(n)), last(iter_at_bucket_or_end(n + 1));
+ for (;
+ first != last && !first.KeyEquals(equals, key);
+ ++first) {
+ }
+ if (first != last)
+ return first;
+ return end();
+ }
+
+ size_type count(const key_type& key) const {
+ const size_type n = bkt_num_key(key);
+ size_type result = 0;
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first)
+ if (first.KeyEquals(equals, key))
+ ++result;
+ return result;
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const;
+
+private:
+ template <class TheKey>
+ size_type bkt_num_key(const TheKey& key) const {
+ return hash(key) % num_buckets;
+ }
+};
+
+template <class I, class size_type_f>
+std::pair<I, I> sthashtable<I, size_type_f>::equal_range(const key_type& key) const {
+ typedef std::pair<const_iterator, const_iterator> pii;
+ const size_type n = bkt_num_key(key);
+ const_iterator first = iter_at_bucket(n);
+ const_iterator last = iter_at_bucket_or_end(n + 1);
+
+ for (; first != last; ++first) {
+ if (first.KeyEquals(equals, key)) {
+ const_iterator cur = first;
+ ++cur;
+ for (; cur != last; ++cur)
+ if (!cur.KeyEquals(equals, key))
+ return pii(const_iterator(first),
+ const_iterator(cur));
+ return pii(const_iterator(first),
+ const_iterator(last));
+ }
+ }
+ return pii(end(), end());
+}
+
+/* end __SGI_STL_HASHTABLE_H */
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash {
+private:
+ typedef sthashtable<TSthashIterator<const Key, const T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+public:
+ template <class TheKey>
+ const_iterator find(const TheKey& key) const {
+ return rep.find(key);
+ }
+ template <class TheKey>
+ bool has(const TheKey& key) const {
+ return rep.find(key) != rep.end();
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type size_in_bytes() const {
+ return rep.size_in_bytes();
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+
+ const size_type* buckets() const {
+ return rep.buckets();
+ }
+ const size_type buckets(size_type n) const {
+ return rep.buckets()[n];
+ }
+};
+
+template <class Key, class HashFcn,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_set: public sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> {
+ typedef sthash<Key, TEmptyValue, HashFcn, EqualKey, size_type_f> Base;
+
+public:
+ using Base::const_iterator;
+ using Base::hasher;
+ using Base::key_equal;
+ using Base::key_type;
+ using Base::size_type;
+ using Base::value_type;
+};
+
+template <class Key, class T, class HashFcn /*= hash<Key>*/,
+ class EqualKey = TEqualTo<Key>, typename size_type_f = ui64>
+class sthash_mm {
+private:
+ typedef sthashtable<TSthashIterator<const Key, T, HashFcn, EqualKey>, size_type_f> ht;
+ ht rep;
+
+public:
+ typedef typename ht::key_type key_type;
+ typedef typename ht::value_type value_type;
+ typedef typename ht::hasher hasher;
+ typedef typename ht::key_equal key_equal;
+ typedef T mapped_type;
+
+ typedef typename ht::size_type size_type;
+ typedef typename ht::difference_type difference_type;
+ typedef typename ht::const_pointer const_pointer;
+ typedef typename ht::const_reference const_reference;
+
+ typedef typename ht::const_iterator const_iterator;
+
+ const hasher hash_funct() const {
+ return rep.hash_funct();
+ }
+ const key_equal key_eq() const {
+ return rep.key_eq();
+ }
+
+public:
+ size_type size() const {
+ return rep.size();
+ }
+ size_type max_size() const {
+ return rep.max_size();
+ }
+ bool empty() const {
+ return rep.empty();
+ }
+
+ const_iterator begin() const {
+ return rep.begin();
+ }
+ const_iterator end() const {
+ return rep.end();
+ }
+
+ const_iterator find(const key_type& key) const {
+ return rep.find(key);
+ }
+
+ size_type count(const key_type& key) const {
+ return rep.count(key);
+ }
+
+ std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const {
+ return rep.equal_range(key);
+ }
+
+ size_type bucket_count() const {
+ return rep.bucket_count();
+ }
+ size_type max_bucket_count() const {
+ return rep.max_bucket_count();
+ }
+ size_type elems_in_bucket(size_type n) const {
+ return rep.elems_in_bucket(n);
+ }
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/library/cpp/on_disk/st_hash/static_hash_map.h b/library/cpp/on_disk/st_hash/static_hash_map.h
new file mode 100644
index 0000000000..5dc50abd39
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/static_hash_map.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include "static_hash.h"
+
+#include <library/cpp/deprecated/mapped_file/mapped_file.h>
+
+#include <util/system/filemap.h>
+
+template <class SH>
+struct sthash_mapped_c {
+ typedef SH H;
+ typedef typename H::const_iterator const_iterator;
+ TMappedFile M;
+ H* hsh;
+ sthash_mapped_c()
+ : M()
+ , hsh(nullptr)
+ {
+ }
+ sthash_mapped_c(const char* fname, bool precharge)
+ : M()
+ , hsh(nullptr)
+ {
+ Open(fname, precharge);
+ }
+ void Open(const char* fname, bool precharge) {
+ M.init(fname);
+ if (precharge)
+ M.precharge();
+ hsh = (H*)M.getData();
+ if (M.getSize() < sizeof(H) || (ssize_t)M.getSize() != hsh->end().Data - (char*)hsh)
+ ythrow yexception() << "Could not map hash: " << fname << " is damaged";
+ }
+ H* operator->() {
+ return hsh;
+ }
+ const H* operator->() const {
+ return hsh;
+ }
+ H* GetSthash() {
+ return hsh;
+ }
+ const H* GetSthash() const {
+ return hsh;
+ }
+};
+
+template <class Key, class T, class Hash>
+struct sthash_mapped: public sthash_mapped_c<sthash<Key, T, Hash>> {
+ typedef sthash<Key, T, Hash> H;
+ sthash_mapped(const char* fname, bool precharge)
+ : sthash_mapped_c<H>(fname, precharge)
+ {
+ }
+ sthash_mapped()
+ : sthash_mapped_c<H>()
+ {
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/sthash_iterators.h b/library/cpp/on_disk/st_hash/sthash_iterators.h
new file mode 100644
index 0000000000..6a9ebdd6c3
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/sthash_iterators.h
@@ -0,0 +1,334 @@
+#pragma once
+
+#include "save_stl.h"
+
+#include <util/system/align.h>
+
+/**
+ This file provides functionality for saving some relatively simple THashMap object
+ to disk in a form that can be mapped read-only (via mmap) at any address.
+ That saved object is accessed via pointer to sthash object (that must have
+ the same parameters as original THashMap object)
+
+ If either key or value are variable-sized (i.e. contain pointers), user must
+ write his own instantiation of TSthashIterator (read iterator for sthash) and
+ TSthashWriter (write iterator for THashMap).
+ An example for <const char *, B> pair is in here.
+**/
+
+// TEmptyValue and SizeOfEx are helpers for sthash_set
+struct TEmptyValue {
+ TEmptyValue() = default;
+};
+
+template <class T>
+inline size_t SizeOfEx() {
+ return sizeof(T);
+}
+
+template <>
+inline size_t SizeOfEx<TEmptyValue>() {
+ return 0;
+}
+template <>
+inline size_t SizeOfEx<const TEmptyValue>() {
+ return 0;
+}
+
+template <class TKey, class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator {
+ // Implementation for simple types
+ typedef const TKey TKeyType;
+ typedef const TValue TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ explicit TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() const {
+ return *(TKey*)Data;
+ }
+ TValue& Value() {
+ return *(TValue*)(Data + sizeof(TKey));
+ }
+ const TValue& Value() const {
+ return *(const TValue*)(Data + sizeof(TKey));
+ }
+
+ template <class AnotherKeyType>
+ bool KeyEquals(const EqualKey& eq, const AnotherKeyType& key) const {
+ return eq(*(TKey*)Data, key);
+ }
+
+ size_t GetLength() const {
+ return sizeof(TKey) + SizeOfEx<TValue>();
+ }
+};
+
+template <class Key, class Value, typename size_type_o = ui64>
+struct TSthashWriter {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const Value>&) const {
+ return sizeof(Key) + SizeOfEx<Value>();
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const Value>& record) const {
+ stream->Write(&record.first, sizeof(Key));
+ stream->Write(&record.second, SizeOfEx<Value>());
+ return 0;
+ }
+};
+
+// Remember that this simplified implementation makes a copy of `key' in std::make_pair.
+// It can also waste some memory on undesired alignment.
+template <class Key, typename size_type_o = ui64>
+struct TSthashSetWriter: public TSthashWriter<Key, TEmptyValue, size_type_o> {
+ typedef TSthashWriter<Key, TEmptyValue, size_type_o> MapWriter;
+ size_t GetRecordSize(const Key& key) const {
+ return MapWriter::GetRecordSize(std::make_pair(key, TEmptyValue()));
+ }
+ int SaveRecord(IOutputStream* stream, const Key& key) const {
+ return MapWriter::SaveRecord(stream, std::make_pair(key, TEmptyValue()));
+ }
+};
+
+// we can't save something with pointers without additional tricks
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A*, B, HashFcn, EqualKey> {};
+
+template <class A, class B, class HashFcn, class EqualKey>
+struct TSthashIterator<A, B*, HashFcn, EqualKey> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B*, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A*, B, size_type_o> {};
+
+template <class A, class B, typename size_type_o>
+struct TSthashWriter<A, B*, size_type_o> {};
+
+template <class T>
+inline size_t AlignForChrKey() {
+ return 4; // TODO: change this (requeres rebuilt of a few existing files)
+}
+
+template <>
+inline size_t AlignForChrKey<TEmptyValue>() {
+ return 1;
+}
+
+template <>
+inline size_t AlignForChrKey<const TEmptyValue>() {
+ return AlignForChrKey<TEmptyValue>();
+}
+
+// !! note that for char*, physical placement of key and value is swapped
+template <class TValue, class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, TValue, HashFcn, EqualKey> {
+ typedef const TValue TValueType;
+ typedef const char* TKeyType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data + SizeOfEx<TValue>();
+ }
+ TValue& Value() {
+ return *(TValue*)Data;
+ }
+ const TValue& Value() const {
+ return *(const TValue*)Data;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data + SizeOfEx<TValue>(), k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + SizeOfEx<TValue>()) + 1 + SizeOfEx<TValue>();
+ length = AlignUp(length, AlignForChrKey<TValue>());
+ return length;
+ }
+};
+
+template <class Value, typename size_type_o>
+struct TSthashWriter<const char*, Value, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const Value>& record) const {
+ size_t length = strlen(record.first) + 1 + SizeOfEx<Value>();
+ length = AlignUp(length, AlignForChrKey<Value>());
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const Value>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.second, SizeOfEx<Value>());
+ size_t length = strlen(record.first) + 1;
+ stream->Write(record.first, length);
+ length = AlignUpSpace(length, AlignForChrKey<Value>());
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class TKey, class HashFcn, class EqualKey>
+struct TSthashIterator<TKey, const char* const, HashFcn, EqualKey> {
+ typedef const TKey TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ TKey& Key() {
+ return *(TKey*)Data;
+ }
+ const char* Value() const {
+ return Data + sizeof(TKey);
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(*(TKey*)Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data + sizeof(TKey)) + 1 + sizeof(TKey);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+};
+
+template <class Key, typename size_type_o>
+struct TSthashWriter<Key, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const Key, const char*>& record) const {
+ size_t length = strlen(record.second) + 1 + sizeof(Key);
+ length = AlignUp(length, (size_t)4);
+ return length;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const Key, const char*>& record) const {
+ const char* alignBuffer = "qqqq";
+ stream->Write(&record.first, sizeof(Key));
+ size_t length = strlen(record.second) + 1;
+ stream->Write(record.second, length);
+ length = AlignUpSpace(length, (size_t)4);
+ if (length)
+ stream->Write(alignBuffer, length);
+ return 0;
+ }
+};
+
+template <class HashFcn, class EqualKey>
+struct TSthashIterator<const char* const, const char* const, HashFcn, EqualKey> {
+ typedef const char* TKeyType;
+ typedef const char* TValueType;
+ typedef EqualKey TKeyEqualType;
+ typedef HashFcn THasherType;
+
+ const char* Data;
+ TSthashIterator()
+ : Data(nullptr)
+ {
+ }
+ TSthashIterator(const char* data)
+ : Data(data)
+ {
+ }
+ void operator++() {
+ Data += GetLength();
+ }
+
+ bool operator!=(const TSthashIterator& that) const {
+ return Data != that.Data;
+ }
+ bool operator==(const TSthashIterator& that) const {
+ return Data == that.Data;
+ }
+ const char* Key() const {
+ return Data;
+ }
+ const char* Value() const {
+ return Data + strlen(Data) + 1;
+ }
+
+ template <class K>
+ bool KeyEquals(const EqualKey& eq, const K& k) const {
+ return eq(Data, k);
+ }
+
+ size_t GetLength() const {
+ size_t length = strlen(Data) + 1;
+ length += strlen(Data + length) + 1;
+ return length;
+ }
+};
+
+template <typename size_type_o>
+struct TSthashWriter<const char*, const char*, size_type_o> {
+ typedef size_type_o TSizeType;
+ size_t GetRecordSize(const std::pair<const char*, const char*>& record) const {
+ size_t size = strlen(record.first) + strlen(record.second) + 2;
+ return size;
+ }
+ int SaveRecord(IOutputStream* stream, const std::pair<const char*, const char*>& record) const {
+ stream->Write(record.first, strlen(record.first) + 1);
+ stream->Write(record.second, strlen(record.second) + 1);
+ return 0;
+ }
+};
diff --git a/library/cpp/on_disk/st_hash/ya.make b/library/cpp/on_disk/st_hash/ya.make
new file mode 100644
index 0000000000..8c6d05711c
--- /dev/null
+++ b/library/cpp/on_disk/st_hash/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+SRCS(
+ fake.cpp
+ save_stl.h
+ static_hash.h
+ static_hash_map.h
+ sthash_iterators.h
+)
+
+PEERDIR(
+ library/cpp/deprecated/mapped_file
+)
+
+END()
diff --git a/library/cpp/pybind/attr.h b/library/cpp/pybind/attr.h
new file mode 100644
index 0000000000..5f25a6d73d
--- /dev/null
+++ b/library/cpp/pybind/attr.h
@@ -0,0 +1,412 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <util/generic/string.h>
+#include <util/generic/map.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+#include <util/generic/ptr.h>
+
+#include "cast.h"
+#include "exceptions.h"
+
+namespace NPyBind {
+ // TBaseAttrGetter
+ template <typename TObjType>
+ class TBaseAttrGetter {
+ public:
+ virtual ~TBaseAttrGetter() {
+ }
+ virtual bool GetAttr(PyObject* owner, const TObjType& self, const TString& attr, PyObject*& res) const = 0;
+
+ virtual bool HasAttr(PyObject* owner, const TObjType& self, const TString& attr, const TSet<TString>& hiddenNames) const {
+ if (hiddenNames.find(attr) != hiddenNames.end())
+ return false;
+ PyObject* res = nullptr;
+ if (!GetAttr(owner, self, attr, res))
+ return false;
+ Py_XDECREF(res);
+ return true;
+ }
+ };
+
+ template <typename TObjType>
+ class TBaseAttrSetter {
+ public:
+ virtual ~TBaseAttrSetter() {
+ }
+
+ virtual bool SetAttr(PyObject* owner, TObjType& self, const TString& attr, PyObject* val) = 0;
+ };
+
+ template <typename TObjType>
+ class TAttrGetters {
+ public:
+ typedef TSimpleSharedPtr<TBaseAttrGetter<TObjType>> TGetterPtr;
+
+ private:
+ typedef TVector<TGetterPtr> TGetterList;
+ typedef TMap<TString, TGetterList> TGetterMap;
+
+ const TSet<TString>& HiddenAttrNames;
+ TGetterMap Getters;
+
+ public:
+ TAttrGetters(const TSet<TString>& hiddenNames)
+ : HiddenAttrNames(hiddenNames)
+ {
+ }
+
+ void AddGetter(const TString& attr, TGetterPtr getter) {
+ Getters[attr].push_back(getter);
+ }
+
+ PyObject* GetAttr(PyObject* owner, const TObjType& self, const TString& attr) const {
+ typename TGetterMap::const_iterator it1 = Getters.find(attr);
+ if (it1 == Getters.end())
+ it1 = Getters.find("");
+ if (it1 == Getters.end())
+ return nullptr;
+ const TGetterList& lst = it1->second;
+ for (typename TGetterList::const_iterator it2 = lst.begin(), end = lst.end(); it2 != end; ++it2) {
+ PyObject* res = nullptr;
+ if ((*it2)->GetAttr(owner, self, attr, res))
+ return res;
+ // IMPORTANT!
+ // we have to fail GetAttr right there because we've failed because of internal python error/exception and can't continue iterating because
+ // it cause subsequent exceptions during call to Py_BuildValue
+ // moreover we have to preserve original exception right there
+ if (PyErr_Occurred()) {
+ break;
+ }
+ }
+ return nullptr;
+ }
+
+ bool HasAttr(PyObject* owner, const TObjType& self, const TString& attr) const {
+ typename TGetterMap::const_iterator it1 = Getters.find(attr);
+ if (it1 == Getters.end())
+ return false;
+ const TGetterList& lst = it1->second;
+ for (typename TGetterList::const_iterator it2 = lst.begin(), end = lst.end(); it2 != end; ++it2) {
+ if ((*it2)->HasAttr(owner, self, attr, HiddenAttrNames))
+ return true;
+ }
+ return false;
+ }
+
+ void GetAttrsDictionary(PyObject* owner, const TObjType& self, TMap<TString, PyObject*>& res) const {
+ for (typename TGetterMap::const_iterator it = Getters.begin(), end = Getters.end(); it != end; ++it) {
+ try {
+ if (HasAttr(owner, self, it->first)) {
+ auto attrPtr = GetAttr(owner, self, it->first);
+ if (attrPtr) {
+ res[it->first] = attrPtr;
+ }
+ if (PyErr_Occurred()) {
+ PyErr_Clear(); // Skip python errors as well
+ }
+ }
+ } catch (const std::exception&) {
+ // ignore this field
+ }
+ }
+ }
+
+ void GetAttrsNames(PyObject* owner, const TObjType& self, TVector<TString>& resultNames) const {
+ for (typename TGetterMap::const_iterator it = Getters.begin(), end = Getters.end(); it != end; ++it) {
+ if (HasAttr(owner, self, it->first))
+ resultNames.push_back(it->first);
+ }
+ }
+ };
+
+ template <typename TObjType>
+ class TGenericAttrGetter: public TBaseAttrGetter<TObjType> {
+ private:
+ TString AttrName;
+
+ public:
+ TGenericAttrGetter(const TString& attrName)
+ : AttrName(attrName)
+ {
+ }
+
+ bool GetAttr(PyObject* obj, const TObjType&, const TString&, PyObject*& res) const override {
+ auto str = NameFromString(AttrName);
+ res = PyObject_GenericGetAttr(obj, str.Get());
+ if (!res && !PyErr_Occurred())
+ ythrow TPyErr(PyExc_AttributeError) << "Can't get generic attribute '" << AttrName << "'";
+ return res;
+ }
+ };
+
+ template <typename TObjType>
+ class TAttrSetters {
+ private:
+ typedef TSimpleSharedPtr<TBaseAttrSetter<TObjType>> TSetterPtr;
+ typedef TVector<TSetterPtr> TSetterList;
+ typedef TMap<TString, TSetterList> TSetterMap;
+
+ TSetterMap Setters;
+
+ public:
+ void AddSetter(const TString& attr, TSetterPtr setter) {
+ Setters[attr].push_back(setter);
+ }
+
+ bool SetAttr(PyObject* owner, TObjType& self, const TString& attr, PyObject* val) {
+ typename TSetterMap::const_iterator it1 = Setters.find(attr);
+ if (it1 == Setters.end())
+ it1 = Setters.find("");
+ if (it1 == Setters.end())
+ return false;
+ const TSetterList& lst = it1->second;
+ for (typename TSetterList::const_iterator it2 = lst.begin(), end = lst.end(); it2 != end; ++it2) {
+ if ((*it2)->SetAttr(owner, self, attr, val))
+ return true;
+ }
+ return false;
+ }
+
+ bool SetAttrDictionary(PyObject* owner, TObjType& self, TMap<TString, PyObject*>& dict) {
+ for (TMap<TString, PyObject*>::const_iterator it = dict.begin(), end = dict.end(); it != end; ++it) {
+ try {
+ SetAttr(owner, self, it->first, it->second);
+ } catch (std::exception&) {
+ // ignore this field
+ }
+ }
+
+ return true;
+ }
+ };
+
+ /**
+ * TMethodAttrGetter - this class maps Python attribute read to C++ method call
+ */
+ template <typename TObjType, typename TResult, typename TSubObject>
+ class TMethodAttrGetter: public TBaseAttrGetter<TObjType> {
+ private:
+ typedef TResult (TSubObject::*TMethod)() const;
+ TMethod Method;
+
+ public:
+ TMethodAttrGetter(TMethod method)
+ : Method(method)
+ {
+ }
+
+ bool GetAttr(PyObject*, const TObjType& self, const TString&, PyObject*& res) const override {
+ const TSubObject* sub = dynamic_cast<const TSubObject*>(&self);
+ if (sub == nullptr)
+ return false;
+ res = BuildPyObject((sub->*Method)());
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType, typename TFunctor>
+ class TFunctorAttrGetter: public TBaseAttrGetter<TObjType> {
+ TFunctor Functor;
+ public:
+ explicit TFunctorAttrGetter(TFunctor functor)
+ : Functor(functor)
+ {
+ }
+
+ bool GetAttr(PyObject*, const TObjType& self, const TString&, PyObject*& res) const override {
+ res = BuildPyObject(Functor(self));
+ return (res != nullptr);
+ }
+ };
+
+
+ /**
+ * TMethodAttrGetterWithCheck - this class maps Python attribute read to C++ HasAttr/GetAttr call
+ * If HasAttr returns false, None is returned.
+ * Otherwise GetAttr is called.
+ */
+ template <typename TObjType, typename TResult, typename TSubObject>
+ class TMethodAttrGetterWithCheck: public TBaseAttrGetter<TObjType> {
+ private:
+ typedef TResult (TSubObject::*TMethod)() const;
+ typedef bool (TSubObject::*TCheckerMethod)() const;
+ TMethod Method;
+ TCheckerMethod CheckerMethod;
+
+ public:
+ TMethodAttrGetterWithCheck(TMethod method, TCheckerMethod checkerMethod)
+ : Method(method)
+ , CheckerMethod(checkerMethod)
+ {
+ }
+
+ bool GetAttr(PyObject*, const TObjType& self, const TString&, PyObject*& res) const override {
+ const TSubObject* sub = dynamic_cast<const TSubObject*>(&self);
+ if (sub == nullptr)
+ return false;
+ if ((sub->*CheckerMethod)())
+ res = BuildPyObject((sub->*Method)());
+ else {
+ Py_INCREF(Py_None);
+ res = Py_None;
+ }
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename TMapper>
+ class TMethodAttrMappingGetter: public TBaseAttrGetter<TObjType> {
+ private:
+ typedef TResult (TSubObject::*TMethod)() const;
+
+ TMethod Method;
+ TMapper Mapper;
+
+ public:
+ TMethodAttrMappingGetter(TMethod method, TMapper mapper)
+ : Method(method)
+ , Mapper(mapper)
+ {
+ }
+
+ bool GetAttr(PyObject*, const TObjType& self, const TString&, PyObject*& res) const override {
+ const TSubObject* sub = dynamic_cast<const TSubObject*>(&self);
+ if (sub == nullptr)
+ return false;
+ res = BuildPyObject(Mapper((sub->*Method)()));
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename TMapper>
+ TSimpleSharedPtr<TBaseAttrGetter<TObjType>>
+ CreateMethodAttrMappingGetter(TResult (TSubObject::*method)() const,
+ TMapper mapper) {
+ return new TMethodAttrMappingGetter<TObjType, TResult, TSubObject, TMapper>(method,
+ mapper);
+ }
+
+ template <typename TObjType, typename TResult, typename TValue, typename TSubObject>
+ class TMethodAttrSetter: public TBaseAttrSetter<TObjType> {
+ private:
+ typedef TResult (TSubObject::*TMethod)(TValue&);
+ TMethod Method;
+
+ public:
+ TMethodAttrSetter(TMethod method)
+ : Method(method)
+ {
+ }
+
+ virtual bool SetAttr(PyObject*, TObjType& self, const TString&, PyObject* val) {
+ TSubObject* sub = dynamic_cast<TSubObject*>(&self);
+ if (sub == nullptr)
+ return false;
+ TValue value;
+ if (!FromPyObject(val, value))
+ return false;
+ (sub->*Method)(value);
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TValue, typename TFunctor>
+ class TFunctorAttrSetter: public TBaseAttrSetter<TObjType> {
+ TFunctor Functor;
+ public:
+ explicit TFunctorAttrSetter(TFunctor functor)
+ : Functor(functor)
+ {
+ }
+
+ bool SetAttr(PyObject*, TObjType& self, const TString&, PyObject* val) const override {
+ TValue value;
+ if (!FromPyObject(val, value))
+ return false;
+ auto res = BuildPyObject(Functor(self, value));
+ return (res != nullptr);
+ }
+ };
+ template <typename TObjType, typename TResult, typename TSubObject>
+ TSimpleSharedPtr<TBaseAttrGetter<TObjType>> CreateMethodAttrGetter(TResult (TSubObject::*method)() const) {
+ return new TMethodAttrGetter<TObjType, TResult, TSubObject>(method);
+ }
+
+ template <typename TObjType, typename TFunctor>
+ TSimpleSharedPtr<TFunctorAttrGetter<TObjType, TFunctor>> CreateFunctorAttrGetter(TFunctor functor) {
+ return MakeSimpleShared<TFunctorAttrGetter<TObjType, TFunctor>>(functor);
+ }
+
+ template <typename TObjType, typename TResult, typename TSubObject>
+ TSimpleSharedPtr<TBaseAttrGetter<TObjType>> CreateMethodAttrGetterWithCheck(
+ TResult (TSubObject::*method)() const,
+ bool (TSubObject::*checkerMethod)() const) {
+ return new TMethodAttrGetterWithCheck<TObjType, TResult, TSubObject>(method, checkerMethod);
+ }
+
+ template <typename TObjType, typename TResult, typename TValue, typename TSubObject>
+ TSimpleSharedPtr<TBaseAttrSetter<TObjType>> CreateMethodAttrSetter(TResult (TSubObject::*method)(TValue&)) {
+ return new TMethodAttrSetter<TObjType, TResult, TValue, TSubObject>(method);
+ }
+
+ template <typename TObjType, typename TFunctor, typename TValue>
+ TSimpleSharedPtr<TFunctorAttrSetter<TObjType, TValue, TFunctor>> CreateFunctorAttrSetter(TFunctor functor) {
+ return MakeSimpleShared<TFunctorAttrSetter<TObjType, TValue, TFunctor>>(functor);
+ }
+
+ template <typename TObjType, typename TValue, typename TSubObject>
+ class TDirectAttrSetter: public TBaseAttrSetter<TObjType> {
+ private:
+ typedef TValue TSubObject::*TValueType;
+ TValueType Value;
+
+ public:
+ TDirectAttrSetter(TValueType value)
+ : Value(value)
+ {
+ }
+
+ bool SetAttr(PyObject*, TObjType& self, const TString&, PyObject* val) override {
+ TSubObject* sub = dynamic_cast<TSubObject*>(&self);
+ if (sub == NULL)
+ return false;
+ if (!FromPyObject(val, sub->*Value))
+ return false;
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TValue, typename TSubObject>
+ TSimpleSharedPtr<TBaseAttrSetter<TObjType>> CreateAttrSetter(TValue TSubObject::*value) {
+ return new TDirectAttrSetter<TObjType, TValue, TSubObject>(value);
+ }
+
+ template <typename TObjType, typename TValue, typename TSubObject>
+ class TDirectAttrGetter: public TBaseAttrGetter<TObjType> {
+ private:
+ typedef TValue TSubObject::*TValueType;
+ TValueType Value;
+
+ public:
+ TDirectAttrGetter(TValueType value)
+ : Value(value)
+ {
+ }
+
+ bool GetAttr(PyObject*, const TObjType& self, const TString&, PyObject*& res) const override {
+ const TSubObject* sub = dynamic_cast<const TSubObject*>(&self);
+ if (sub == nullptr)
+ return false;
+ res = BuildPyObject(sub->*Value);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType, typename TValue, typename TSubObject>
+ TSimpleSharedPtr<TBaseAttrGetter<TObjType>> CreateAttrGetter(TValue TSubObject::*value) {
+ return new TDirectAttrGetter<TObjType, TValue, TSubObject>(value);
+ }
+}
diff --git a/library/cpp/pybind/cast.cpp b/library/cpp/pybind/cast.cpp
new file mode 100644
index 0000000000..60a44b0e83
--- /dev/null
+++ b/library/cpp/pybind/cast.cpp
@@ -0,0 +1,324 @@
+#include "cast.h"
+#include <util/generic/yexception.h>
+#include <util/generic/buffer.h>
+
+namespace NPyBind {
+ PyObject* GetTrueRef(bool incref) {
+ if (incref)
+ Py_RETURN_TRUE;
+ return Py_True;
+ }
+
+ PyObject* GetFalseRef(bool incref) {
+ if (incref)
+ Py_RETURN_FALSE;
+ return Py_False;
+ }
+
+ PyObject* BuildPyObject(int val) {
+ return Py_BuildValue("i", val);
+ }
+
+ PyObject* BuildPyObject(unsigned int val) {
+ return Py_BuildValue("I", val);
+ }
+
+ PyObject* BuildPyObject(long int val) {
+ return Py_BuildValue("l", val);
+ }
+
+ PyObject* BuildPyObject(unsigned long int val) {
+ return Py_BuildValue("k", val);
+ }
+
+#ifdef PY_LONG_LONG
+ PyObject* BuildPyObject(PY_LONG_LONG val) {
+ return Py_BuildValue("L", val);
+ }
+
+ PyObject* BuildPyObject(unsigned PY_LONG_LONG val) {
+ return Py_BuildValue("K", val);
+ }
+#endif
+
+ PyObject* BuildPyObject(float val) {
+ return Py_BuildValue("f", val);
+ }
+
+ PyObject* BuildPyObject(double val) {
+ return Py_BuildValue("d", val);
+ }
+
+ PyObject* BuildPyObject(const TStringBuf& val) {
+ if (!val.IsInited())
+ Py_RETURN_NONE;
+
+ PyObject* stringValue = Py_BuildValue("s#", val.data(), static_cast<int>(val.length()));
+ if (stringValue != nullptr) {
+ return stringValue;
+ }
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ } else {
+ return nullptr;
+ }
+ return Py_BuildValue("y#", val.data(), static_cast<int>(val.length()));
+ }
+
+ PyObject* BuildPyObject(const char* val) {
+ if (val == nullptr)
+ Py_RETURN_NONE;
+ PyObject* stringValue = Py_BuildValue("s#", val, static_cast<int>(strlen(val)));
+ if (stringValue != nullptr) {
+ return stringValue;
+ }
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ } else {
+ return nullptr;
+ }
+ return Py_BuildValue("y#", val, static_cast<int>(strlen(val)));
+ }
+
+ PyObject* BuildPyObject(const TWtringBuf& val) {
+ if (!val.IsInited())
+ Py_RETURN_NONE;
+#if PY_VERSION_HEX < 0x03030000
+ TPyObjectPtr result(PyUnicode_FromUnicode(nullptr, val.size()), true);
+ Py_UNICODE* buf = PyUnicode_AS_UNICODE(result.Get());
+ if (buf == nullptr)
+ Py_RETURN_NONE;
+ for (size_t i = 0; i < val.size(); ++i) {
+ buf[i] = static_cast<Py_UNICODE>(val[i]);
+ }
+#else
+ PyObject* unicodeValue = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, val.data(), val.size());
+ if (unicodeValue == nullptr)
+ Py_RETURN_NONE;
+ TPyObjectPtr result(unicodeValue, true);
+#endif
+ return result.RefGet();
+ }
+
+ PyObject* BuildPyObject(const TBuffer& val) {
+ TPyObjectPtr res(PyList_New(val.size()), true);
+ for (size_t i = 0, size = val.Size(); i < size; ++i)
+ PyList_SetItem(res.Get(), i, BuildPyObject(val.Data()[i]));
+ return res.RefGet();
+ }
+
+ PyObject* BuildPyObject(bool val) {
+ if (val)
+ Py_RETURN_TRUE;
+ else
+ Py_RETURN_FALSE;
+ }
+
+ PyObject* BuildPyObject(PyObject* val) {
+ Py_XINCREF(val);
+ return val;
+ }
+
+ PyObject* BuildPyObject(TPyObjectPtr ptr) {
+ return ptr.RefGet();
+ }
+
+ /* python represents (http://docs.python.org/c-api/arg.html#Py_BuildValue)
+ * char, uchar, short, ushort, int, long as PyInt
+ * uint, ulong as PyInt or PyLong (if exceeds sys.maxint)
+ * longlong, ulonglong as PyLong
+ */
+
+ template <>
+ bool FromPyObject(PyObject* obj, long& res) {
+ if (PyLong_Check(obj)) {
+ res = PyLong_AsLong(obj);
+ return true;
+ }
+ if (PyFloat_Check(obj)) {
+ res = static_cast<long>(PyFloat_AsDouble(obj));
+ return true;
+ }
+#if PY_MAJOR_VERSION < 3
+ res = PyInt_AsLong(obj);
+#endif
+ return -1 != res || !PyErr_Occurred();
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, unsigned long& res) {
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ if (lres < 0)
+ return false;
+ res = static_cast<unsigned long long>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, int& res) {
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<int>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, unsigned char& res) {
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<unsigned char>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, char& res) {
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<char>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, unsigned int& res) {
+ unsigned long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<unsigned int>(lres);
+ return true;
+ }
+
+#ifdef HAVE_LONG_LONG
+ template <>
+ bool FromPyObject(PyObject* obj, long long& res) {
+ if (PyLong_Check(obj)) {
+ res = PyLong_AsLongLong(obj);
+ return -1 != res || !PyErr_Occurred();
+ }
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<long long>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, unsigned long long& res) {
+ if (PyLong_Check(obj)) {
+ res = PyLong_AsUnsignedLongLong(obj);
+ return static_cast<unsigned long long>(-1) != res || !PyErr_Occurred();
+ }
+ long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<unsigned long long>(lres);
+ return true;
+ }
+#endif
+
+ template <>
+ bool FromPyObject(PyObject* obj, double& res) {
+ if (PyFloat_Check(obj)) {
+ res = PyFloat_AsDouble(obj);
+ return true;
+ }
+ long long lres;
+ if (!FromPyObject(obj, lres))
+ return false;
+ res = static_cast<double>(lres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, float& res) {
+ double dres;
+ if (!FromPyObject(obj, dres))
+ return false;
+ res = static_cast<float>(dres);
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, bool& res) {
+ if (!PyBool_Check(obj))
+ return false;
+ if (obj == Py_True)
+ res = true;
+ else
+ res = false;
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, PyObject*& res) {
+ Py_XINCREF(obj);
+ res = obj;
+ return true;
+ }
+
+ template <>
+ bool FromPyObject(PyObject* obj, TPyObjectPtr& res) {
+ res = TPyObjectPtr(obj);
+ return true;
+ }
+
+ static inline bool _FromPyObject(PyObject* obj, TStringBuf& res) {
+ char* str;
+ Py_ssize_t len;
+#if PY_MAJOR_VERSION >= 3
+ if (PyUnicode_Check(obj)) {
+ auto buf = PyUnicode_AsUTF8AndSize(obj, &len);
+ res = TStringBuf(buf, len);
+ return true;
+ }
+#endif
+ if (-1 == PyBytes_AsStringAndSize(obj, &str, &len) || 0 > len)
+ return false;
+ res = TStringBuf(str, len);
+ return true;
+ }
+
+ bool FromPyObject(PyObject* obj, TStringBuf& res) {
+ return _FromPyObject(obj, res);
+ }
+
+ bool FromPyObject(PyObject* obj, TString& res) {
+ TStringBuf str;
+ if (!_FromPyObject(obj, str))
+ return false;
+ res = str;
+ return true;
+ }
+
+ bool FromPyObject(PyObject* obj, TUtf16String& res) {
+ if (!PyUnicode_Check(obj))
+ return false;
+ auto str = TPyObjectPtr(PyUnicode_AsUTF16String(obj), true);
+ if (!str)
+ return false;
+ constexpr auto BOM_SIZE = 2;
+ size_t len = (static_cast<size_t>(PyBytes_GET_SIZE(str.Get())) - BOM_SIZE) / 2;
+ res.resize(len);
+ memcpy(res.begin(), PyBytes_AS_STRING(str.Get()) + BOM_SIZE, len * 2);
+ return (nullptr == PyErr_Occurred());
+ }
+
+ bool FromPyObject(PyObject* obj, TBuffer& res) {
+ if (!PyList_Check(obj))
+ return false;
+ size_t cnt = PyList_Size(obj);
+ res.Reserve(cnt);
+ for (size_t i = 0; i < cnt; ++i) {
+ PyObject* item = PyList_GET_ITEM(obj, i);
+ char ch = 0;
+ if (!FromPyObject(item, ch))
+ return false;
+ res.Append(ch);
+ }
+ return true;
+ }
+}
diff --git a/library/cpp/pybind/cast.h b/library/cpp/pybind/cast.h
new file mode 100644
index 0000000000..1f3d7d8366
--- /dev/null
+++ b/library/cpp/pybind/cast.h
@@ -0,0 +1,373 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/generic/set.h>
+#include <util/generic/yexception.h>
+#include <util/generic/hash.h>
+#include <util/generic/map.h>
+#include <util/generic/maybe.h>
+#include <utility>
+#include <initializer_list>
+#include "ptr.h"
+
+namespace NPyBind {
+ PyObject* GetTrueRef(bool incref = true);
+ PyObject* GetFalseRef(bool incref = true);
+
+ PyObject* BuildPyObject(int val);
+ PyObject* BuildPyObject(unsigned int val);
+ PyObject* BuildPyObject(long int val);
+ PyObject* BuildPyObject(unsigned long int val);
+#ifdef PY_LONG_LONG
+ PyObject* BuildPyObject(PY_LONG_LONG val);
+ PyObject* BuildPyObject(unsigned PY_LONG_LONG val);
+#endif
+ PyObject* BuildPyObject(float val);
+ PyObject* BuildPyObject(double val);
+ PyObject* BuildPyObject(const TStringBuf& val);
+ PyObject* BuildPyObject(const char* val);
+ PyObject* BuildPyObject(const TWtringBuf& val);
+ PyObject* BuildPyObject(const TBuffer& val);
+ PyObject* BuildPyObject(bool val);
+ PyObject* BuildPyObject(PyObject*);
+ PyObject* BuildPyObject(TPyObjectPtr);
+
+ template <typename T>
+ PyObject* BuildPyObject(const TVector<T>& val);
+
+ template <typename T>
+ PyObject* BuildPyObject(const TSet<T>& val);
+
+ template <typename TKey, typename TVal>
+ PyObject* BuildPyObject(const THashMap<TKey, TVal>& val);
+
+ template <typename T1, typename T2>
+ PyObject* BuildPyObject(const std::pair<T1, T2>& val) {
+ TPyObjectPtr first(BuildPyObject(val.first), true);
+ if (!first) {
+ return nullptr;
+ }
+ TPyObjectPtr second(BuildPyObject(val.second), true);
+ if (!first || !second) {
+ return nullptr;
+ }
+ TPyObjectPtr res(PyList_New(2), true);
+ PyList_SetItem(res.Get(), 0, first.RefGet());
+ PyList_SetItem(res.Get(), 1, second.RefGet());
+ return res.RefGet();
+ }
+
+ template <typename T>
+ PyObject* BuildPyObject(const TVector<T>& val) {
+ TPyObjectPtr res(PyList_New(val.size()), true);
+ for (size_t i = 0, size = val.size(); i < size; ++i) {
+ auto pythonVal = BuildPyObject(std::move(val[i]));
+ if (!pythonVal) {
+ return nullptr;
+ }
+ PyList_SetItem(res.Get(), i, pythonVal);
+ }
+ return res.RefGet();
+ }
+
+ template <typename T>
+ PyObject* BuildPyObject(TVector<T>&& val) {
+ TPyObjectPtr res(PyList_New(val.size()), true);
+ for (size_t i = 0, size = val.size(); i < size; ++i) {
+ auto pythonVal = BuildPyObject(std::move(val[i]));
+ if (!pythonVal) {
+ return nullptr;
+ }
+ PyList_SetItem(res.Get(), i, pythonVal);
+ }
+ return res.RefGet();
+ }
+
+ template <typename T>
+ PyObject* BuildPyObject(const TSet<T>& val) {
+ TPyObjectPtr res(PySet_New(nullptr), true);
+ for (const auto& v : val) {
+ auto pythonVal = BuildPyObject(std::move(v));
+ if (!pythonVal) {
+ return nullptr;
+ }
+ PySet_Add(res.Get(), pythonVal);
+ }
+ return res.RefGet();
+ }
+
+ template <typename T>
+ PyObject* BuildPyObject(const THashSet<T>& val) {
+ TPyObjectPtr res(PySet_New(nullptr), true);
+ for (const auto& v : val) {
+ auto pythonVal = BuildPyObject(std::move(v));
+ if (!pythonVal) {
+ return nullptr;
+ }
+ PySet_Add(res.Get(), pythonVal);
+ }
+ return res.RefGet();
+ }
+
+ template <typename TKey, typename TVal>
+ PyObject* BuildPyObject(const THashMap<TKey, TVal>& val) {
+ TPyObjectPtr res(PyDict_New(), true);
+ for (typename THashMap<TKey, TVal>::const_iterator it = val.begin(), end = val.end(); it != end; ++it) {
+ auto prevOccurred = PyErr_Occurred();
+ Y_UNUSED(prevOccurred);
+ TPyObjectPtr k(BuildPyObject(it->first), true);
+ if (!k) {
+ return nullptr;
+ }
+ TPyObjectPtr v(BuildPyObject(it->second), true);
+ if (!v) {
+ return nullptr;
+ }
+ PyDict_SetItem(res.Get(), k.Get(), v.Get());
+ }
+ return res.RefGet();
+ }
+
+ template <typename TKey, typename TVal>
+ PyObject* BuildPyObject(const TMap<TKey, TVal>& val) {
+ TPyObjectPtr res(PyDict_New(), true);
+ for (typename TMap<TKey, TVal>::const_iterator it = val.begin(), end = val.end(); it != end; ++it) {
+ TPyObjectPtr k(BuildPyObject(it->first), true);
+ if (!k) {
+ return nullptr;
+ }
+ TPyObjectPtr v(BuildPyObject(it->second), true);
+ if (!v) {
+ return nullptr;
+ }
+ PyDict_SetItem(res.Get(), k.Get(), v.Get());
+ }
+ return res.RefGet();
+ }
+
+
+ template <typename TKey, typename TVal>
+ PyObject* BuildPyObject(const TMultiMap<TKey, TVal>& val) {
+ TPyObjectPtr res(PyDict_New(), true);
+ TMaybe<TKey> prevKey;
+ TPyObjectPtr currentEntry(PyList_New(0), true);
+ for (const auto& [key, value]: val) {
+ if (prevKey.Defined() && prevKey != key) {
+ TPyObjectPtr pyPrevKey(BuildPyObject(*prevKey), true);
+ if (!pyPrevKey) {
+ return nullptr;
+ }
+ PyDict_SetItem(res.Get(), pyPrevKey.Get(), currentEntry.Get());
+ currentEntry = TPyObjectPtr(PyList_New(0), true);
+ }
+ TPyObjectPtr pyValue(BuildPyObject(value), true);
+ if (!pyValue) {
+ return nullptr;
+ }
+ PyList_Append(currentEntry.Get(), pyValue.Get());
+ prevKey = key;
+ }
+
+ if (prevKey.Defined()) {
+ TPyObjectPtr pyPrevKey(BuildPyObject(*prevKey), true);
+ if (!pyPrevKey) {
+ return nullptr;
+ }
+ PyDict_SetItem(res.Get(), pyPrevKey.Get(), currentEntry.Get());
+ }
+ return res.RefGet();
+ }
+
+ template <typename T>
+ PyObject* BuildPyObject(const TMaybe<T>& val) {
+ if (!val.Defined())
+ Py_RETURN_NONE;
+ return BuildPyObject(val.GetRef());
+ }
+
+ template <typename T, typename C, typename D>
+ PyObject* BuildPyObject(const TSharedPtr<T, C, D>& val) {
+ if (!val.Get())
+ Py_RETURN_NONE;
+ return BuildPyObject(*val.Get());
+ }
+
+ template <typename T>
+ bool FromPyObject(PyObject* obj, T& res);
+
+ bool FromPyObject(PyObject* obj, TString& res);
+ bool FromPyObject(PyObject* obj, TStringBuf& res);
+ bool FromPyObject(PyObject* obj, TUtf16String& res);
+ bool FromPyObject(PyObject* obj, TBuffer& res);
+
+ template <typename T>
+ bool FromPyObject(PyObject* obj, TMaybe<T>& res) {
+ //we need to save current error before trying derserialize the value
+ //because it can produce conversion errors in python that we don't need to handle
+ struct TError {
+ public:
+ TError() {
+ PyErr_Fetch(&Type, &Value, &Traceback);
+ }
+ ~TError() {
+ PyErr_Restore(Type, Value, Traceback);
+
+ }
+ private:
+ PyObject* Type = nullptr;
+ PyObject* Value = nullptr;
+ PyObject* Traceback = nullptr;
+ } currentPyExcInfo;
+ T val;
+ if (FromPyObject(obj, val)) {
+ res = val;
+ return true;
+ }
+ if (obj == Py_None) {
+ res = Nothing();
+ return true;
+ }
+ return false;
+ }
+
+ template <typename T1, typename T2>
+ bool FromPyObject(PyObject* obj, std::pair<T1, T2>& res) {
+ PyObject* first;
+ PyObject* second;
+ if (PyTuple_Check(obj) && 2 == PyTuple_Size(obj)) {
+ first = PyTuple_GET_ITEM(obj, 0);
+ second = PyTuple_GET_ITEM(obj, 1);
+ } else if (PyList_Check(obj) && 2 == PyList_Size(obj)) {
+ first = PyList_GET_ITEM(obj, 0);
+ second = PyList_GET_ITEM(obj, 1);
+ } else {
+ return false;
+ }
+ return FromPyObject(first, res.first) && FromPyObject(second, res.second);
+ }
+
+ template <typename T>
+ bool FromPyObject(PyObject* obj, TVector<T>& res) {
+ if (!PyList_Check(obj))
+ return false;
+ size_t cnt = PyList_Size(obj);
+ res.resize(cnt);
+ for (size_t i = 0; i < cnt; ++i) {
+ PyObject* item = PyList_GET_ITEM(obj, i);
+ if (!FromPyObject(item, res[i]))
+ return false;
+ }
+ return true;
+ }
+
+ template <typename K, typename V>
+ bool FromPyObject(PyObject* obj, THashMap<K, V>& res) {
+ if (!PyDict_Check(obj))
+ return false;
+ TPyObjectPtr list(PyDict_Keys(obj), true);
+ size_t cnt = PyList_Size(list.Get());
+ for (size_t i = 0; i < cnt; ++i) {
+ PyObject* key = PyList_GET_ITEM(list.Get(), i);
+ PyObject* value = PyDict_GetItem(obj, key);
+ K rkey;
+ V rvalue;
+ if (!FromPyObject(key, rkey))
+ return false;
+ if (!FromPyObject(value, rvalue))
+ return false;
+ res[rkey] = rvalue;
+ }
+ return true;
+ }
+
+ template <typename K, typename V>
+ bool FromPyObject(PyObject* obj, TMap<K, V>& res) {
+ if (!PyDict_Check(obj))
+ return false;
+ TPyObjectPtr list(PyDict_Keys(obj), true);
+ size_t cnt = PyList_Size(list.Get());
+ for (size_t i = 0; i < cnt; ++i) {
+ PyObject* key = PyList_GET_ITEM(list.Get(), i);
+ PyObject* value = PyDict_GetItem(obj, key);
+ K rkey;
+ V rvalue;
+ if (!FromPyObject(key, rkey))
+ return false;
+ if (!FromPyObject(value, rvalue))
+ return false;
+ res[rkey] = rvalue;
+ }
+ return true;
+ }
+
+ class cast_exception: public TBadCastException {
+ };
+
+ template <typename T>
+ T FromPyObject(PyObject* obj) {
+ T res;
+ if (!FromPyObject(obj, res))
+ ythrow cast_exception() << "Cannot cast argument to " << TypeName<T>();
+ return res;
+ }
+
+ template <class... Args, std::size_t... I>
+ bool ExtractArgs(std::index_sequence<I...>, PyObject* args, Args&... outArgs) {
+ if (!args || !PyTuple_Check(args) || PyTuple_Size(args) != sizeof...(Args))
+ return false;
+ bool res = true;
+ (void)std::initializer_list<bool>{(res = res && NPyBind::FromPyObject(PyTuple_GET_ITEM(args, I), outArgs))...};
+ return res;
+ }
+
+ template <class... Args>
+ bool ExtractArgs(PyObject* args, Args&... outArgs) {
+ return ExtractArgs(std::index_sequence_for<Args...>(), args, outArgs...);
+ }
+
+ template <class... Args, std::size_t... I>
+ bool ExtractOptionalArgs(std::index_sequence<I...>, PyObject* args, PyObject* kwargs, const char* keywords[], Args&... outArgs) {
+ PyObject* pargs[sizeof...(Args)] = {};
+ static const char format[sizeof...(Args) + 2] = {'|', ((void)I, 'O')..., 0};
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, format, const_cast<char**>(keywords), &pargs[I]...))
+ return false;
+ bool res = true;
+ (void)std::initializer_list<bool>{(res = res && (!pargs[I] || NPyBind::FromPyObject(pargs[I], outArgs)))...};
+ return res;
+ }
+
+ template <class... Args>
+ bool ExtractOptionalArgs(PyObject* args, PyObject* kwargs, const char* keywords[], Args&... outArgs) {
+ return ExtractOptionalArgs(std::index_sequence_for<Args...>(), args, kwargs, keywords, outArgs...);
+ }
+
+ template <typename... Args, std::size_t... I>
+ static auto GetArguments(std::index_sequence<I...>, PyObject* args) {
+ Y_UNUSED(args); // gcc bug
+ return std::make_tuple(FromPyObject<std::remove_cv_t<std::remove_reference_t<Args>>>(PyTuple_GetItem(args, I))...);
+ }
+
+ template <typename... Args>
+ static auto GetArguments(PyObject* args) {
+ return GetArguments<Args...>(std::index_sequence_for<Args...>(), args);
+ }
+
+ inline PyObject* ReturnString(TStringBuf s) {
+#if PY_MAJOR_VERSION >= 3
+ return PyUnicode_FromStringAndSize(s.data(), s.size());
+#else
+ return PyBytes_FromStringAndSize(s.data(), s.size());
+#endif
+ }
+
+ inline TPyObjectPtr ReturnBytes(TStringBuf s) {
+ return TPyObjectPtr(PyBytes_FromStringAndSize(s.data(), s.size()), true);
+ }
+
+ inline TPyObjectPtr NameFromString(TStringBuf s) {
+ return TPyObjectPtr(ReturnString(s), true);
+ }
+}
diff --git a/library/cpp/pybind/embedding.cpp b/library/cpp/pybind/embedding.cpp
new file mode 100644
index 0000000000..cf8941a92a
--- /dev/null
+++ b/library/cpp/pybind/embedding.cpp
@@ -0,0 +1,63 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include "embedding.h"
+
+#include <util/generic/ptr.h>
+#include <util/generic/yexception.h>
+
+namespace NPyBind {
+#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
+ class TDeleteRawMem {
+ public:
+ template <typename T>
+ static inline void Destroy(T* t) noexcept {
+ PyMem_RawFree(t);
+ }
+ };
+
+ template <typename T>
+ using TRawMemHolder = THolder<T, TDeleteRawMem>;
+
+ static void SetProgramName(char* name) {
+ TRawMemHolder<wchar_t> wideName(Py_DecodeLocale(name, nullptr));
+ Y_ENSURE(wideName);
+ Py_SetProgramName(wideName.Get());
+ }
+#endif
+
+ TEmbedding::TEmbedding(char* argv0) {
+#if PY_MAJOR_VERSION < 3
+ Py_SetProgramName(argv0);
+ Py_Initialize();
+#elif PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 8
+ PyStatus status;
+
+ PyConfig config;
+ PyConfig_InitPythonConfig(&config);
+ // Disable parsing command line arguments
+ config.parse_argv = 0;
+
+ status = PyConfig_SetBytesString(&config, &config.program_name, argv0);
+ if (PyStatus_Exception(status)) {
+ PyConfig_Clear(&config);
+ Py_ExitStatusException(status);
+ }
+
+ status = Py_InitializeFromConfig(&config);
+ if (PyStatus_Exception(status)) {
+ PyConfig_Clear(&config);
+ Py_ExitStatusException(status);
+ }
+
+ PyConfig_Clear(&config);
+#elif PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 8
+ SetProgramName(argv0);
+ Py_Initialize();
+#endif
+ }
+
+ TEmbedding::~TEmbedding() {
+ Py_Finalize();
+ }
+}
diff --git a/library/cpp/pybind/embedding.h b/library/cpp/pybind/embedding.h
new file mode 100644
index 0000000000..18553d9f6c
--- /dev/null
+++ b/library/cpp/pybind/embedding.h
@@ -0,0 +1,10 @@
+#pragma once
+
+namespace NPyBind {
+ class TEmbedding {
+ public:
+ TEmbedding(char* argv0);
+ ~TEmbedding();
+ };
+
+}
diff --git a/library/cpp/pybind/empty.cpp b/library/cpp/pybind/empty.cpp
new file mode 100644
index 0000000000..10da997ecc
--- /dev/null
+++ b/library/cpp/pybind/empty.cpp
@@ -0,0 +1,2 @@
+#include "init.h"
+#include "v2.h"
diff --git a/library/cpp/pybind/exceptions.cpp b/library/cpp/pybind/exceptions.cpp
new file mode 100644
index 0000000000..db1531fc63
--- /dev/null
+++ b/library/cpp/pybind/exceptions.cpp
@@ -0,0 +1,147 @@
+#include "exceptions.h"
+#include "cast.h"
+#include "module.h"
+#include <util/generic/algorithm.h>
+
+namespace NPyBind {
+
+ namespace NPrivate {
+ TPyObjectPtr CreatePyBindModule() {
+ return TPyObjectPtr(TExceptionsHolder::DoInitPyBindModule(), true);
+ }
+ }//NPrivate
+
+ TPyObjectPtr TExceptionsHolder::GetException(const TString& name) {
+ if (name == "")
+ return TPyObjectPtr(nullptr);
+ if (!Exceptions[name].Get())
+ ythrow yexception() << "Wrong base class '" << name << "'";
+ return Exceptions[name];
+ }
+
+ TPyObjectPtr TExceptionsHolder::GetExceptions(const TVector<TString>& names) {
+ TVector<TString> tmp(names.begin(), names.end());
+ TVector<TString>::iterator end = std::unique(tmp.begin(), tmp.end());
+ TPyObjectPtr tuple(PyTuple_New(std::distance(tmp.begin(), end)), true);
+ for (size_t i = 0; i < (size_t)std::distance(tmp.begin(), end); ++i) {
+ if (!Exceptions[tmp[i]].Get())
+ ythrow yexception() << "Wrong base class '" << tmp[i] << "'";
+ PyTuple_SetItem(tuple.Get(), i, Exceptions[tmp[i]].Get());
+ }
+ return tuple;
+ }
+
+ // def PyBindObjectReconstructor(cl, props):
+ // return cl(__properties__=props)
+ static PyObject* PyBindObjectReconstructor(PyObject*, PyObject* args) {
+ TPyObjectPtr callable, props;
+ if (!ExtractArgs(args, callable, props))
+ ythrow yexception() << "Wrong method arguments";
+#if PY_MAJOR_VERSION >= 3
+ TPyObjectPtr noArgs(PyTuple_New(0), true);
+#else
+ TPyObjectPtr noArgs(PyList_New(0), true);
+#endif
+ TPyObjectPtr kw(PyDict_New(), true);
+ PyDict_SetItemString(kw.Get(), "__properties__", props.Get());
+ TPyObjectPtr res(PyObject_Call(callable.Get(), noArgs.Get(), kw.Get()), true);
+ return res.RefGet();
+ }
+
+ static PyMethodDef PyBindMethods[] = {
+ {"PyBindObjectReconstructor", PyBindObjectReconstructor, METH_VARARGS, "Tech method. It's required for unpickling."},
+ {nullptr, nullptr, 0, nullptr}};
+
+#if PY_MAJOR_VERSION >= 3
+ static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "pybind",
+ NULL,
+ -1,
+ PyBindMethods,
+ NULL, NULL, NULL, NULL
+ };
+
+ static PyObject* InitPyBind() {
+ return PyModule_Create(&moduledef);
+ }
+#else
+ static PyObject* InitPyBind() {
+ return Py_InitModule("pybind", PyBindMethods);
+ }
+#endif
+
+ void TExceptionsHolder::DoInitPyBindModule2() {
+ DoInitPyBindModule();
+ }
+
+ PyObject* TExceptionsHolder::DoInitPyBindModule() {
+ Instance().Module = NPyBind::TPyObjectPtr(InitPyBind(), true);
+ if (!Instance().Module.Get())
+ return nullptr;
+
+ for (TCheckersVector::const_iterator it = Instance().Checkers.begin(), end = Instance().Checkers.end(); it != end; ++it) {
+ TString name = (*it)->GetName();
+ if (!!name) {
+ //Ref to the object should be incremented before passing to AddObject
+ auto res = PyModule_AddObject(Instance().Module.Get(), name.data(), (*it)->GetException().RefGet());
+ if (res < 0) {
+ ythrow yexception() << "Failed to add object " << name << " to internal module pybind";
+ }
+ }
+ }
+ return Instance().Module.RefGet();
+ }
+
+ void TExceptionsHolder::Clear() {
+ //Unfortunately in Python3 we can't retrack this object because of PyError_NewException
+ //it's only the safe way to preserve GC gens in valid state during the finalization
+ for (auto& ptr: Checkers) {
+ if (!dynamic_cast<const TPyErrExceptionsChecker*>(ptr.Get())) { // no need to untrack standard PyExc_* exceptions from TPyErrExceptionsChecker
+ if (auto exceptionPtr = ptr->GetException()) {
+ PyObject_GC_UnTrack(exceptionPtr.Get());
+ }
+ }
+ }
+ Checkers.clear();
+ Exceptions.clear();
+ Module.Drop();
+ }
+
+ TExceptionsHolder::TExceptionsHolder() {
+ AddException<std::exception>("yexception");
+ AddException<TSystemError>("TSystemError", "yexception");
+ AddException<TIoException>("TIoException", "yexception");
+
+ TVector<TString> names(2);
+ names[0] = "TSystemError";
+ names[1] = "TIoException";
+
+ AddException<TIoSystemError>("TIoSystemError", names);
+ AddException<TFileError>("TFileError", "TIoSystemError");
+ AddException<TBadCastException>("TBadCastException", "yexception");
+
+ Checkers.push_back(new TPyErrExceptionsChecker);
+
+ // XXX: In Python 2.6, PyImport_AppendInittab() function takes non-const char*, this causes
+ // "ISO C++11 does not allow conversion from string literal to 'char *'" warning.
+ static char pybind[] = "pybind";
+#if PY_MAJOR_VERSION >= 3
+ PyImport_AppendInittab(pybind, DoInitPyBindModule);
+
+ NPrivate::AddFinalizationCallBack([this]() {
+ Clear();
+ });
+#else
+ PyImport_AppendInittab(pybind, DoInitPyBindModule2);
+#endif
+ }
+
+ NPyBind::TPyObjectPtr TExceptionsHolder::ToPyException(const std::exception& ex) {
+ for (TCheckersVector::const_reverse_iterator it = Checkers.rbegin(), end = Checkers.rend(); it != end; ++it) {
+ if ((*it)->Check(ex))
+ return (*it)->GetException();
+ }
+ return TPyObjectPtr(nullptr);
+ }
+}
diff --git a/library/cpp/pybind/exceptions.h b/library/cpp/pybind/exceptions.h
new file mode 100644
index 0000000000..48e20995e4
--- /dev/null
+++ b/library/cpp/pybind/exceptions.h
@@ -0,0 +1,143 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <util/generic/yexception.h>
+#include <util/generic/map.h>
+#include <util/generic/vector.h>
+#include "ptr.h"
+
+namespace NPyBind {
+ // Usage:
+ // ythrow TPyErr(PyExc_TypeError) << "some python type error somewhere in your C++ code";
+ //
+ class TPyErr: public virtual yexception {
+ public:
+ TPyErr(PyObject* theException = PyExc_RuntimeError)
+ : Exception(theException)
+ {
+ }
+
+ TPyObjectPtr GetException() const {
+ return Exception;
+ }
+
+ private:
+ NPyBind::TPyObjectPtr Exception;
+ };
+
+ //Private api for creating PyBind python module
+ //Needs only for overriding pybind python module in library which imports other pybind library
+ namespace NPrivate {
+ TPyObjectPtr CreatePyBindModule();
+ }//NPrivate
+ class TExceptionsHolder {
+ friend TPyObjectPtr NPrivate::CreatePyBindModule();
+ private:
+ TExceptionsHolder(const TExceptionsHolder&);
+ TExceptionsHolder& operator=(const TExceptionsHolder&);
+ TExceptionsHolder();
+
+ void Clear();
+ TPyObjectPtr GetException(const TString&);
+ TPyObjectPtr GetExceptions(const TVector<TString>&);
+ private:
+ class TExceptionsChecker {
+ public:
+ virtual ~TExceptionsChecker() {
+ }
+ virtual bool Check(const std::exception& ex) const = 0;
+ virtual TString GetName() const = 0;
+ virtual TPyObjectPtr GetException() const = 0;
+ };
+
+ template <typename TExcType>
+ class TConcreteExceptionsChecker: public TExceptionsChecker {
+ private:
+ TString Name;
+ TPyObjectPtr Exception;
+
+ public:
+ TConcreteExceptionsChecker(const TString& name, TPyObjectPtr exception)
+ : Name(name)
+ , Exception(exception)
+ {
+ }
+
+ bool Check(const std::exception& ex) const override {
+ const std::exception* e = &ex;
+ return dynamic_cast<const TExcType*>(e);
+ }
+
+ TString GetName() const override {
+ return Name;
+ }
+
+ TPyObjectPtr GetException() const override {
+ return Exception;
+ }
+ };
+
+ class TPyErrExceptionsChecker: public TExceptionsChecker {
+ private:
+ mutable TPyObjectPtr Exception;
+
+ public:
+ TPyErrExceptionsChecker() {
+ }
+
+ bool Check(const std::exception& ex) const override {
+ const TPyErr* err = dynamic_cast<const TPyErr*>(&ex);
+ if (err) {
+ Exception = err->GetException();
+ }
+ return err != nullptr;
+ }
+
+ TString GetName() const override {
+ return TString();
+ }
+
+ TPyObjectPtr GetException() const override {
+ return Exception;
+ }
+ };
+
+ typedef TSimpleSharedPtr<TExceptionsChecker> TCheckerPtr;
+ typedef TVector<TCheckerPtr> TCheckersVector;
+ typedef TMap<TString, TPyObjectPtr> TExceptionsMap;
+
+ TPyObjectPtr Module;
+ TCheckersVector Checkers;
+ TExceptionsMap Exceptions;
+
+ static PyObject* DoInitPyBindModule();
+ static void DoInitPyBindModule2();
+
+ public:
+ static TExceptionsHolder& Instance() {
+ static TExceptionsHolder Holder;
+ return Holder;
+ }
+
+ template <typename TExcType>
+ void AddException(const TString& name, const TString& base = "") {
+ TPyObjectPtr baseException(GetException(base));
+ TString fullName = TString("pybind.") + name;
+ TPyObjectPtr exception(PyErr_NewException(const_cast<char*>(fullName.c_str()), baseException.Get(), nullptr), true);
+ Checkers.push_back(new TConcreteExceptionsChecker<TExcType>(name, exception));
+ Exceptions[name] = exception;
+ }
+
+ template <typename TExcType>
+ void AddException(const TString& name, const TVector<TString>& bases) {
+ TPyObjectPtr baseExceptions(GetExceptions(bases));
+ TString fullName = TString("pybind.") + name;
+ TPyObjectPtr exception(PyErr_NewException(const_cast<char*>(fullName.c_str()), baseExceptions.Get(), nullptr), true);
+ Checkers.push_back(new TConcreteExceptionsChecker<TExcType>(name, exception));
+ Exceptions[name] = exception;
+ }
+
+ NPyBind::TPyObjectPtr ToPyException(const std::exception&);
+ };
+}
diff --git a/library/cpp/pybind/init.h b/library/cpp/pybind/init.h
new file mode 100644
index 0000000000..58874574ed
--- /dev/null
+++ b/library/cpp/pybind/init.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include "ptr.h"
+
+namespace NPyBind {
+#if PY_MAJOR_VERSION >= 3
+
+#define PYBIND_MODINIT(name) PyMODINIT_FUNC PyInit_##name()
+
+ inline PyObject* ModInitReturn(TPyObjectPtr&& modptr) {
+ return modptr.Release();
+ }
+
+#else
+
+#define PYBIND_MODINIT(name) PyMODINIT_FUNC init##name()
+
+ inline void ModInitReturn(TPyObjectPtr&&) {
+ }
+
+#endif
+}
diff --git a/library/cpp/pybind/method.h b/library/cpp/pybind/method.h
new file mode 100644
index 0000000000..7c1f6e90e1
--- /dev/null
+++ b/library/cpp/pybind/method.h
@@ -0,0 +1,439 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <util/generic/string.h>
+#include <util/generic/map.h>
+#include <util/generic/set.h>
+#include <util/generic/vector.h>
+#include <util/generic/ptr.h>
+#include <util/generic/typetraits.h>
+
+#include <util/generic/function.h>
+
+#include "cast.h"
+
+namespace NPyBind {
+ template <typename TObjType>
+ class TBaseMethodCaller {
+ public:
+ virtual ~TBaseMethodCaller() {
+ }
+ virtual bool CallMethod(PyObject* owner, TObjType* self, PyObject* args, PyObject* kwargs, PyObject*& res) const = 0;
+ virtual bool HasMethod(PyObject*, TObjType*, const TString&, const TSet<TString>&) {
+ return true;
+ }
+ };
+
+ template <typename TObjType>
+ class TIsACaller;
+
+ template <typename TObjType>
+ class TMethodCallers {
+ private:
+ typedef TSimpleSharedPtr<TBaseMethodCaller<TObjType>> TCallerPtr;
+ typedef TVector<TCallerPtr> TCallerList;
+ typedef TMap<TString, TCallerList> TCallerMap;
+
+ const TSet<TString>& HiddenAttrNames;
+ TCallerMap Callers;
+
+ public:
+ TMethodCallers(const TSet<TString>& hiddenNames)
+ : HiddenAttrNames(hiddenNames)
+ {
+ }
+
+ void AddCaller(const TString& name, TCallerPtr caller) {
+ Callers[name].push_back(caller);
+ }
+
+ bool HasCaller(const TString& name) const {
+ return Callers.has(name);
+ }
+
+ PyObject* CallMethod(PyObject* owner, TObjType* self, PyObject* args, PyObject* kwargs, const TString& name) const {
+ const TCallerList* lst = Callers.FindPtr(name);
+ if (!lst)
+ return nullptr;
+ for (const auto& caller : *lst) {
+ PyObject* res = nullptr;
+ PyErr_Clear();
+ if (caller->CallMethod(owner, self, args, kwargs, res))
+ return res;
+ }
+ return nullptr;
+ }
+
+ bool HasMethod(PyObject* owner, TObjType* self, const TString& name) const {
+ const TCallerList* lst = Callers.FindPtr(name);
+ if (!lst)
+ return false;
+ for (const auto& caller : *lst) {
+ if (caller->HasMethod(owner, self, name, HiddenAttrNames))
+ return true;
+ }
+ return false;
+ }
+
+ void GetMethodsNames(PyObject* owner, TObjType* self, TVector<TString>& resultNames) const {
+ for (const auto& it : Callers) {
+ if (HasMethod(owner, self, it.first) && !HiddenAttrNames.contains(it.first))
+ resultNames.push_back(it.first);
+ }
+ }
+
+ void GetAllMethodsNames(TVector<TString>& resultNames) const {
+ for (const auto& it : Callers) {
+ resultNames.push_back(it.first);
+ }
+ }
+
+ void GetPropertiesNames(PyObject*, TObjType* self, TVector<TString>& resultNames) const {
+ const TCallerList* lst = Callers.FindPtr("IsA");
+ if (!lst)
+ return;
+ for (const auto& caller : *lst) {
+ TIsACaller<TObjType>* isACaller = dynamic_cast<TIsACaller<TObjType>*>(caller.Get());
+ if (isACaller) {
+ resultNames = isACaller->GetPropertiesNames(self);
+ return;
+ }
+ }
+ }
+ };
+
+ template <typename TObjType>
+ class TIsACaller: public TBaseMethodCaller<TObjType> {
+ private:
+ class TIsAChecker {
+ public:
+ virtual ~TIsAChecker() {
+ }
+ virtual bool Check(const TObjType* obj) const = 0;
+ };
+
+ template <typename TConcrete>
+ class TIsAConcreteChecker: public TIsAChecker {
+ public:
+ bool Check(const TObjType* obj) const override {
+ return dynamic_cast<const TConcrete*>(obj) != nullptr;
+ }
+ };
+
+ typedef TSimpleSharedPtr<TIsAChecker> TCheckerPtr;
+ typedef TMap<TString, TCheckerPtr> TCheckersMap;
+
+ TCheckersMap Checkers;
+
+ bool Check(const TString& name, const TObjType* obj) const {
+ const TCheckerPtr* checker = Checkers.FindPtr(name);
+ if (!checker) {
+ PyErr_Format(PyExc_KeyError, "unknown class name: %s", name.data());
+ return false;
+ }
+ return (*checker)->Check(obj);
+ }
+
+ protected:
+ TIsACaller() {
+ }
+
+ template <typename TConcrete>
+ void AddChecker(const TString& name) {
+ Checkers[name] = new TIsAConcreteChecker<TConcrete>;
+ }
+
+ public:
+ bool CallMethod(PyObject*, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (args == nullptr || !PyTuple_Check(args))
+ return false;
+ size_t cnt = PyTuple_Size(args);
+ bool result = true;
+ for (size_t i = 0; i < cnt; ++i) {
+ result = result && Check(
+#if PY_MAJOR_VERSION >= 3
+ PyUnicode_AsUTF8(
+#else
+ PyString_AsString(
+#endif
+ PyTuple_GetItem(args, i)), self);
+ }
+ if (PyErr_Occurred()) {
+ return false;
+ }
+ res = BuildPyObject(result);
+ return true;
+ }
+
+ TVector<TString> GetPropertiesNames(const TObjType* obj) const {
+ TVector<TString> names;
+
+ for (const auto& it : Checkers) {
+ if (it.second->Check(obj)) {
+ names.push_back(it.first);
+ }
+ }
+
+ return names;
+ }
+ };
+
+ template <typename TObjType>
+ class TGenericMethodCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ TString AttrName;
+
+ public:
+ TGenericMethodCaller(const TString& attrName)
+ : AttrName(attrName)
+ {
+ }
+
+ bool CallMethod(PyObject* obj, TObjType*, PyObject* args, PyObject*, PyObject*& res) const override {
+ auto str = NameFromString(AttrName);
+ PyObject* attr = PyObject_GenericGetAttr(obj, str.Get());
+ if (!attr)
+ ythrow yexception() << "Can't get generic attribute '" << AttrName << "'";
+ res = PyObject_CallObject(attr, args);
+ return res != nullptr;
+ }
+ };
+
+
+ template <typename TObjType, typename TSubObject>
+ class TSubObjectChecker: public TBaseMethodCaller<TObjType> {
+ public:
+ ~TSubObjectChecker() override {
+ }
+
+ bool HasMethod(PyObject*, TObjType* self, const TString&, const TSet<TString>&) override {
+ return dynamic_cast<const TSubObject*>(self) != nullptr;
+ }
+ };
+
+ template <typename TFunctor, typename Tuple, typename ResType, typename=std::enable_if_t<!std::is_same_v<ResType, void>>>
+ void ApplyFunctor(TFunctor functor, Tuple resultArgs, PyObject*& res) {
+ res = BuildPyObject(std::move(Apply(functor, resultArgs)));
+ }
+
+ template <typename TFunctor, typename Tuple, typename ResType, typename=std::enable_if_t<std::is_same_v<ResType, void>>, typename=void>
+ void ApplyFunctor(TFunctor functor, Tuple resultArgs, PyObject*& res) {
+ Py_INCREF(Py_None);
+ res = Py_None;
+ Apply(functor, resultArgs);
+ }
+
+ template <typename TObjType, typename TResType, typename... Args>
+ class TFunctorCaller: public TBaseMethodCaller<TObjType> {
+ using TFunctor = std::function<TResType(TObjType&,Args...)>;
+ TFunctor Functor;
+ public:
+ explicit TFunctorCaller(TFunctor functor):
+ Functor(functor){}
+
+ bool CallMethod(PyObject*, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const {
+ auto methodArgsTuple = GetArguments<Args...>(args);
+ auto resultArgs = std::tuple_cat(std::tie(*self), methodArgsTuple);
+ ApplyFunctor<TFunctor, decltype(resultArgs), TResType>(Functor, resultArgs, res);
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TRealType>
+ class TGetStateCaller: public TSubObjectChecker<TObjType, TRealType> {
+ protected:
+ TPyObjectPtr AddFromCaller(PyObject* obj, const TString& methodName) const {
+ PyObject* res = PyObject_CallMethod(obj, const_cast<char*>(methodName.c_str()), const_cast<char*>(""));
+ if (!res) {
+ PyErr_Clear();
+ return TPyObjectPtr(Py_None);
+ }
+ return TPyObjectPtr(res, true);
+ }
+
+ void GetStandartAttrsDictionary(PyObject* obj, TRealType*, TMap<TString, TPyObjectPtr>& dict) const {
+ TPyObjectPtr attrsDict(PyObject_GetAttrString(obj, "__dict__"), true);
+ TMap<TString, TPyObjectPtr> attrs;
+ if (!FromPyObject(attrsDict.Get(), attrs))
+ ythrow yexception() << "Can't get '__dict__' attribute";
+ dict.insert(attrs.begin(), attrs.end());
+ }
+
+ virtual void GetAttrsDictionary(PyObject* obj, TRealType* self, TMap<TString, TPyObjectPtr>& dict) const = 0;
+
+ public:
+ bool CallMethod(PyObject* obj, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (!ExtractArgs(args))
+ ythrow yexception() << "Can't parse arguments: it should be none";
+ TRealType* rself = dynamic_cast<TRealType*>(self);
+ if (!rself)
+ return false;
+ TMap<TString, TPyObjectPtr> dict;
+ GetAttrsDictionary(obj, rself, dict);
+ res = BuildPyObject(dict);
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TRealType>
+ class TSetStateCaller: public TSubObjectChecker<TObjType, TRealType> {
+ protected:
+ void SetStandartAttrsDictionary(PyObject* obj, TRealType*, TMap<TString, TPyObjectPtr>& dict) const {
+ TPyObjectPtr value(BuildPyObject(dict), true);
+ PyObject_SetAttrString(obj, "__dict__", value.Get());
+ }
+
+ virtual void SetAttrsDictionary(PyObject* obj, TRealType* self, TMap<TString, TPyObjectPtr>& dict) const = 0;
+
+ public:
+ bool CallMethod(PyObject* obj, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ TMap<TString, TPyObjectPtr> dict;
+ if (!ExtractArgs(args, dict))
+ ythrow yexception() << "Can't parse arguments: it should be one dictionary";
+ TRealType* rself = dynamic_cast<TRealType*>(self);
+ if (!rself)
+ return false;
+ SetAttrsDictionary(obj, rself, dict);
+ Py_INCREF(Py_None);
+ res = Py_None;
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename TMethod, typename... Args>
+ class TAnyParameterMethodCaller: public TSubObjectChecker<TObjType, TSubObject> {
+ private:
+ TMethod Method;
+
+ public:
+ TAnyParameterMethodCaller(TMethod method)
+ : Method(method)
+ {
+ }
+
+ public:
+ bool CallMethod(PyObject*, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ TSubObject* sub = dynamic_cast<TSubObject*>(self);
+ if (sub == nullptr)
+ return false;
+ if (args && (!PyTuple_Check(args) || PyTuple_Size(args) != TFunctionArgs<TMethod>::Length)) {
+ //ythrow yexception() << "Method takes " << (size_t)(TFunctionArgs<TMethod>::Length) << " arguments, " << PyTuple_Size(args) << " provided";
+ return false;
+ }
+
+ try {
+ class Applicant {
+ public:
+ TResult operator()(Args... theArgs) {
+ return (Sub->*Method)(theArgs...);
+ }
+ TSubObject* Sub;
+ TMethod Method;
+ };
+ res = BuildPyObject(std::move(Apply(Applicant{sub, Method}, GetArguments<Args...>(args))));
+ } catch (cast_exception) {
+ return false;
+ } catch (...) {
+ if (PyExc_StopIteration == PyErr_Occurred()) {
+ // NB: it's replacement for geo_boost::python::throw_error_already_set();
+ return true;
+ }
+ PyErr_SetString(PyExc_RuntimeError, CurrentExceptionMessage().data());
+ return true;
+ }
+
+ return true;
+ }
+ };
+
+ template <typename TObjType, typename TSubObject, typename TMethod, typename... Args>
+ class TAnyParameterMethodCaller<TObjType, void, TSubObject, TMethod, Args...>: public TSubObjectChecker<TObjType, TSubObject> {
+ private:
+ TMethod Method;
+
+ public:
+ TAnyParameterMethodCaller(TMethod method)
+ : Method(method)
+ {
+ }
+
+ public:
+ bool CallMethod(PyObject*, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ TSubObject* sub = dynamic_cast<TSubObject*>(self);
+ if (sub == nullptr) {
+ return false;
+ }
+ if (args && (!PyTuple_Check(args) || PyTuple_Size(args) != TFunctionArgs<TMethod>::Length)) {
+ return false;
+ }
+
+ try {
+ class Applicant {
+ public:
+ void operator()(Args... theArgs) {
+ (Sub->*Method)(theArgs...);
+ }
+ TSubObject* Sub;
+ TMethod Method;
+ };
+
+ Apply(Applicant{sub, Method}, GetArguments<Args...>(args));
+
+ Py_INCREF(Py_None);
+ res = Py_None;
+ } catch (cast_exception) {
+ return false;
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, CurrentExceptionMessage().data());
+ return true;
+ }
+
+ return true;
+ }
+ };
+
+ template <typename TResult, typename TSubObject, typename... Args>
+ struct TConstTraits {
+ typedef TResult (TSubObject::*TMethod)(Args... args) const;
+ };
+
+ template <typename TResult, typename TSubObject, typename... Args>
+ struct TNonConstTraits {
+ typedef TResult (TSubObject::*TMethod)(Args... args);
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename TMethod, typename... Args>
+ class TConstMethodCaller: public TAnyParameterMethodCaller<TObjType, TResult, const TSubObject, typename TConstTraits<TResult, TSubObject, Args...>::TMethod, Args...> {
+ public:
+ TConstMethodCaller(typename TConstTraits<TResult, TSubObject, Args...>::TMethod method)
+ : TAnyParameterMethodCaller<TObjType, TResult, const TSubObject, typename TConstTraits<TResult, TSubObject, Args...>::TMethod, Args...>(method)
+ {
+ }
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename... Args>
+ TSimpleSharedPtr<TBaseMethodCaller<TObjType>> CreateConstMethodCaller(TResult (TSubObject::*method)(Args...) const) {
+ return new TConstMethodCaller<TObjType, TResult, TSubObject, TResult (TSubObject::*)(Args...) const, Args...>(method);
+ }
+
+ template <typename TObjType, typename TResType, typename... Args>
+ TSimpleSharedPtr<TBaseMethodCaller<TObjType>> CreateFunctorCaller(std::function<TResType(TObjType&, Args...)> functor) {
+ return new TFunctorCaller<TObjType, TResType, Args...>(functor);
+ }
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename TMethod, typename... Args>
+ class TMethodCaller: public TAnyParameterMethodCaller<TObjType, TResult, TSubObject, typename TNonConstTraits<TResult, TSubObject, Args...>::TMethod, Args...> {
+ public:
+ TMethodCaller(typename TNonConstTraits<TResult, TSubObject, Args...>::TMethod method)
+ : TAnyParameterMethodCaller<TObjType, TResult, TSubObject, typename TNonConstTraits<TResult, TSubObject, Args...>::TMethod, Args...>(method)
+ {
+ }
+ };
+
+ template <typename TObjType, typename TResult, typename TSubObject, typename... Args>
+ TSimpleSharedPtr<TBaseMethodCaller<TObjType>> CreateMethodCaller(TResult (TSubObject::*method)(Args...)) {
+ return new TMethodCaller<TObjType, TResult, TSubObject, TResult (TSubObject::*)(Args...), Args...>(method);
+ }
+
+}
diff --git a/library/cpp/pybind/module.cpp b/library/cpp/pybind/module.cpp
new file mode 100644
index 0000000000..63b15de45d
--- /dev/null
+++ b/library/cpp/pybind/module.cpp
@@ -0,0 +1,72 @@
+#include "module.h"
+#include "ptr.h"
+
+#include <util/generic/adaptor.h>
+
+namespace NPyBind {
+
+#if PY_MAJOR_VERSION >= 3
+ namespace NPrivate {
+ struct TFinCallBacksHolder {
+ static TVector<TFinalizationCallBack>& GetCallBacks() {
+ static TVector<TFinalizationCallBack> res;
+ return res;
+ }
+ };
+
+ TAtExitRegistrar::TAtExitRegistrar(TPyObjectPtr module) {
+ TPyObjectPtr atExitModuleName(Py_BuildValue("s", "atexit"), true);
+ TPyObjectPtr atExitModule(PyImport_Import(atExitModuleName.Get()));
+ Y_ABORT_UNLESS(atExitModule);
+ TPyObjectPtr finalizerFunc(PyObject_GetAttrString(module.Get(), "finalizer"), true);
+ Y_ABORT_UNLESS(finalizerFunc);
+ TPyObjectPtr registerName(Py_BuildValue("s", "register"), true);
+ PyObject_CallMethodObjArgs(atExitModule.Get(), registerName.Get(), finalizerFunc.Get(), nullptr);
+ }
+
+ TPyBindModuleRegistrar::TPyBindModuleRegistrar() {
+ TPyObjectPtr modules(PySys_GetObject("modules"));
+ Y_ENSURE(modules.Get());
+ if (Module = NPrivate::CreatePyBindModule()) {
+ Y_ABORT_UNLESS(0 == PyDict_SetItemString(modules.Get(), "pybind", Module.RefGet()));
+ }
+ AddFinalizationCallBack([this]() {
+ auto ptr = Module;
+ Y_UNUSED(ptr);
+ TPyObjectPtr modules(PySys_GetObject("modules"));
+ Y_ENSURE(modules.Get());
+ TPyObjectPtr pyBindName(Py_BuildValue("s", "pybind"));
+ if (PyDict_Contains(modules.Get(), pyBindName.Get()) == 1) {
+ Y_ABORT_UNLESS(0==PyDict_DelItemString(modules.Get(), "pybind"));
+ }
+ if (Module) {
+ //We have to untrack the module because some refs from him refers to gc-leaked errors
+ //see exceptions.cpp fore more info
+ PyObject_GC_UnTrack(Module.Get());
+ Module.Drop();
+ }
+ });
+ }
+
+ void AddFinalizationCallBack(TFinalizationCallBack callback) {
+ TFinCallBacksHolder::GetCallBacks().push_back(callback);
+ }
+
+ int FinalizeAll() {
+ for (auto callback: Reversed(NPrivate::TFinCallBacksHolder::GetCallBacks())) {
+ callback();
+ }
+ return 0;
+ }
+ }
+#endif
+
+
+ TModuleHolder::TModuleHolder()
+ : Methods(1, new TVector<TMethodDef>)
+ {
+#if PY_MAJOR_VERSION >= 3
+ AddModuleMethod<TModuleMethodCaller<decltype(&NPrivate::FinalizeAll), &NPrivate::FinalizeAll>::Call>("finalizer");
+#endif
+ }
+}//NPyBind
diff --git a/library/cpp/pybind/module.h b/library/cpp/pybind/module.h
new file mode 100644
index 0000000000..41dcb4dfec
--- /dev/null
+++ b/library/cpp/pybind/module.h
@@ -0,0 +1,176 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "ptr.h"
+#include "cast.h"
+#include "exceptions.h"
+
+#include <util/generic/function.h>
+
+namespace NPyBind {
+#if PY_MAJOR_VERSION >= 3
+ namespace NPrivate {
+ using TFinalizationCallBack = std::function<void()>;
+ void AddFinalizationCallBack(TFinalizationCallBack);
+ class TAtExitRegistrar: private TNonCopyable {
+ TAtExitRegistrar(TPyObjectPtr module);
+ public:
+ static void Instantiate(TPyObjectPtr module) {
+ static TAtExitRegistrar registrar(module);
+ Y_UNUSED(registrar);
+ }
+ };
+
+ class TPyBindModuleRegistrar: private TNonCopyable {
+ TPyBindModuleRegistrar();
+ TPyObjectPtr Module;
+ public:
+ static void Instantiate() {
+ static TPyBindModuleRegistrar registrar;
+ Y_UNUSED(registrar);
+ }
+ };
+ } //NPrivate
+#endif
+
+ class TModuleHolder {
+ private:
+ TModuleHolder(const TModuleHolder&);
+ TModuleHolder& operator=(const TModuleHolder&);
+
+ TModuleHolder();
+ private:
+ typedef PyCFunction TModuleMethod;
+#if PY_MAJOR_VERSION >= 3
+ typedef PyObject* (*TModuleInitFunc)();
+#else
+ typedef void (*TModuleInitFunc)();
+#endif
+
+ struct TMethodDef {
+ TString Name;
+ TModuleMethod Method;
+ TString Description;
+ int Flags;
+
+ TMethodDef(const TString& name, TModuleMethod method, const TString& descr, int flags)
+ : Name(name)
+ , Method(method)
+ , Description(descr)
+ , Flags(flags)
+ {
+ }
+
+ operator PyMethodDef() const {
+ PyMethodDef cur = {Name.c_str(), Method, Flags, Description.c_str()};
+ return cur;
+ }
+ };
+
+ typedef TSimpleSharedPtr<TVector<TMethodDef>> TMethodDefVecPtr;
+ typedef TSimpleSharedPtr<TVector<PyMethodDef>> TPyMethodDefVecPtr;
+
+ TVector<TMethodDefVecPtr> Methods;
+ TVector<TPyMethodDefVecPtr> Defs;
+#if PY_MAJOR_VERSION >= 3
+ //because the md_name will leak otherwise
+ class TPyModuleDefWithName {
+ PyModuleDef Def;
+ TString Name;
+ public:
+ explicit TPyModuleDefWithName(TString name, TPyMethodDefVecPtr moduleDefs)
+ : Name(std::move(name))
+ {
+ Def = PyModuleDef{
+ PyModuleDef_HEAD_INIT,
+ Name.c_str(),
+ nullptr,
+ -1,
+ moduleDefs->data(),
+ nullptr, nullptr, nullptr, nullptr
+ };
+ }
+ PyModuleDef* GetDefPtr() {
+ return &Def;
+ }
+
+ };
+ TVector<TSimpleSharedPtr<TPyModuleDefWithName>> ModuleDefs;
+#endif
+
+ template <TModuleMethod method>
+ static PyObject* MethodWrapper(PyObject* obj, PyObject* args) {
+ try {
+ PyObject* res = method(obj, args);
+ if (!res && !PyErr_Occurred())
+ ythrow yexception() << "\nModule method exited with NULL, but didn't set Error.\n Options:\n -- Return correct value or None;\n -- Set python exception;\n -- Throw c++ exception.";
+ return res;
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, "Unknown error occurred while trying to call module method");
+ }
+ return nullptr;
+ }
+
+ public:
+ static TModuleHolder& Instance() {
+ static TModuleHolder Holder;
+ return Holder;
+ }
+
+ void ImportModule(TPyObjectPtr module, const char* const name, TModuleInitFunc initFunc) {
+ PyImport_AppendInittab(const_cast<char*>(name), initFunc);
+ TPyObjectPtr importedModule(PyImport_ImportModule(name), true);
+ PyModule_AddObject(module.Get(), name, importedModule.Get());
+ }
+
+ template <TModuleMethod method>
+ void AddModuleMethod(const TString& name, const TString& descr = "") {
+ Methods.back()->push_back(TMethodDef(name, MethodWrapper<method>, descr, METH_VARARGS));
+ }
+
+ TPyObjectPtr InitModule(const TString& name) {
+ Defs.push_back(new TVector<PyMethodDef>(Methods.back()->begin(), Methods.back()->end()));
+ PyMethodDef blank = {nullptr, nullptr, 0, nullptr};
+ Defs.back()->push_back(blank);
+#if PY_MAJOR_VERSION >= 3
+ ModuleDefs.push_back(MakeSimpleShared<TPyModuleDefWithName>(name, Defs.back()));
+ TPyObjectPtr res(PyModule_Create(ModuleDefs.back()->GetDefPtr()));
+ NPrivate::TAtExitRegistrar::Instantiate(res);
+ NPrivate::TPyBindModuleRegistrar::Instantiate();
+#else
+ TPyObjectPtr res(Py_InitModule(name.c_str(), &(Defs.back()->at(0))));
+#endif
+ Methods.push_back(new TVector<TMethodDef>);
+ return res;
+ }
+ };
+
+ template <typename TMethodSignature, TMethodSignature method>
+ class TModuleMethodCaller {
+ private:
+ template <typename TResult, typename... Args>
+ struct TCaller {
+ static PyObject* Call(PyObject* args) {
+ return BuildPyObject(Apply(method, GetArguments<Args...>(args)));
+ }
+ };
+
+ template <typename TResult, typename... Args>
+ static PyObject* InternalCall(TResult (*)(Args...), PyObject* args) {
+ return BuildPyObject(Apply(method, GetArguments<Args...>(args)));
+ }
+
+ public:
+ static PyObject* Call(PyObject*, PyObject* args) {
+ if (args && (!PyTuple_Check(args) || PyTuple_Size(args) != TFunctionArgs<TMethodSignature>::Length)) {
+ ythrow yexception() << "Method takes " << (size_t)(TFunctionArgs<TMethodSignature>::Length) << " arguments, " << PyTuple_Size(args) << " provided";
+ }
+
+ return InternalCall(method, args);
+ }
+ };
+
+}
diff --git a/library/cpp/pybind/pod.cpp b/library/cpp/pybind/pod.cpp
new file mode 100644
index 0000000000..3cf030e537
--- /dev/null
+++ b/library/cpp/pybind/pod.cpp
@@ -0,0 +1,18 @@
+#include "pod.h"
+
+namespace NPyBind {
+ class TPODAttrGetter: public TBaseAttrGetter<TPOD> {
+ public:
+ bool GetAttr(PyObject*, const TPOD& self, const TString& attr, PyObject*& res) const override {
+ res = self.GetAttr(attr.c_str());
+ return res != nullptr;
+ }
+ };
+
+ TPODTraits::TPODTraits()
+ : MyParent("TPOD", "simple struct")
+ {
+ AddGetter("", new TPODAttrGetter);
+ }
+
+}
diff --git a/library/cpp/pybind/pod.h b/library/cpp/pybind/pod.h
new file mode 100644
index 0000000000..90165fdbec
--- /dev/null
+++ b/library/cpp/pybind/pod.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "attr.h"
+#include "typedesc.h"
+
+namespace NPyBind {
+ struct TPOD {
+ TPyObjectPtr Dict;
+
+ TPOD()
+ : Dict(PyDict_New(), true)
+ {
+ }
+ bool SetAttr(const char* name, PyObject* value) {
+ return PyDict_SetItemString(Dict.Get(), name, value) == 0;
+ }
+ PyObject* GetAttr(const char* name) const {
+ PyObject* res = PyDict_GetItemString(Dict.Get(), name);
+ Py_XINCREF(res);
+ return res;
+ }
+ };
+
+ class TPODTraits: public NPyBind::TPythonType<TPOD, TPOD, TPODTraits> {
+ private:
+ typedef TPythonType<TPOD, TPOD, TPODTraits> MyParent;
+ friend class TPythonType<TPOD, TPOD, TPODTraits>;
+ TPODTraits();
+
+ public:
+ static TPOD* GetObject(TPOD& obj) {
+ return &obj;
+ }
+ };
+
+ template <>
+ inline bool FromPyObject<TPOD*>(PyObject* obj, TPOD*& res) {
+ res = TPODTraits::CastToObject(obj);
+ if (res == nullptr)
+ return false;
+ return true;
+ }
+ template <>
+ inline bool FromPyObject<const TPOD*>(PyObject* obj, const TPOD*& res) {
+ res = TPODTraits::CastToObject(obj);
+ if (res == nullptr)
+ return false;
+ return true;
+ }
+
+}
diff --git a/library/cpp/pybind/ptr.h b/library/cpp/pybind/ptr.h
new file mode 100644
index 0000000000..e136736690
--- /dev/null
+++ b/library/cpp/pybind/ptr.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <util/generic/ptr.h>
+
+namespace NPyBind {
+ template <class T>
+ class TPythonIntrusivePtrOps {
+ public:
+ static inline void Ref(T* t) noexcept {
+ Py_XINCREF(t);
+ }
+
+ static inline void UnRef(T* t) noexcept {
+ Py_XDECREF(t);
+ }
+
+ static inline void DecRef(T* t) noexcept {
+ Py_XDECREF(t);
+ }
+ };
+
+ class TPyObjectPtr: public TIntrusivePtr<PyObject, TPythonIntrusivePtrOps<PyObject>> {
+ private:
+ typedef TIntrusivePtr<PyObject, TPythonIntrusivePtrOps<PyObject>> TParent;
+ typedef TPythonIntrusivePtrOps<PyObject> TOps;
+
+ public:
+ inline TPyObjectPtr() noexcept {
+ }
+
+ inline explicit TPyObjectPtr(PyObject* obj) noexcept
+ : TParent(obj)
+ {
+ }
+
+ inline TPyObjectPtr(PyObject* obj, bool unref) noexcept
+ : TParent(obj)
+ {
+ if (unref)
+ TOps::UnRef(TParent::Get());
+ }
+
+ inline PyObject* RefGet() {
+ TOps::Ref(TParent::Get());
+ return TParent::Get();
+ }
+ };
+
+}
diff --git a/library/cpp/pybind/typeattrs.h b/library/cpp/pybind/typeattrs.h
new file mode 100644
index 0000000000..a906b9ec2b
--- /dev/null
+++ b/library/cpp/pybind/typeattrs.h
@@ -0,0 +1,368 @@
+#pragma once
+
+#include "ptr.h"
+#include "cast.h"
+#include "attr.h"
+#include "method.h"
+
+#include <util/generic/vector.h>
+
+namespace NPyBind {
+ template <typename TObject>
+ class TPythonTypeAttributes {
+ private:
+ TAttrGetters<TObject> AttrGetters;
+ TAttrSetters<TObject> AttrSetters;
+ TMethodCallers<TObject> MethodCallers;
+
+ class TGetAttrsNamesCaller;
+ class TGetMethodsNamesCaller;
+ class TGetAllNamesCaller;
+ class TGetPropertiesNamesCaller;
+ class TDictAttrGetter;
+ class TDictAttrSetter;
+ class TGetAttributeMethodCaller;
+ class TSetAttrMethodCaller;
+ class TGetStrReprMethodCaller;
+ class TReduceMethodCaller;
+ class TBaseGetStateMethodCaller;
+ class TBaseSetStateMethodCaller;
+
+ TPythonTypeAttributes(const TPythonTypeAttributes&);
+ TPythonTypeAttributes& operator=(const TPythonTypeAttributes&);
+
+ static const TSet<TString> HiddenAttrNames;
+
+ typedef PyObject* (*GetAttrFunction)(PyObject*, char*);
+ typedef int (*SetAttrFunction)(PyObject*, char*, PyObject*);
+ GetAttrFunction GetAttr;
+ SetAttrFunction SetAttr;
+
+ public:
+ typedef TSimpleSharedPtr<TBaseAttrGetter<TObject>> TGetterPtr;
+ typedef TSimpleSharedPtr<TBaseAttrSetter<TObject>> TSetterPtr;
+ typedef TSimpleSharedPtr<TBaseMethodCaller<TObject>> TCallerPtr;
+
+ TPythonTypeAttributes(GetAttrFunction getAttr, SetAttrFunction setAttr)
+ : AttrGetters(HiddenAttrNames)
+ , MethodCallers(HiddenAttrNames)
+ , GetAttr(getAttr)
+ , SetAttr(setAttr)
+ {
+ }
+
+ void InitCommonAttributes() {
+ // attributes
+ AddGetter("__dict__", new TDictAttrGetter(AttrGetters));
+ AddSetter("__dict__", new TDictAttrSetter(AttrSetters));
+
+ // methods
+ AddCaller("GetAttrsNames", new TGetAttrsNamesCaller(AttrGetters));
+ AddCaller("GetMethodsNames", new TGetMethodsNamesCaller(MethodCallers));
+ AddCaller("GetAllNames", new TGetAllNamesCaller(AttrGetters, MethodCallers));
+ AddCaller("GetPropertiesNames", new TGetPropertiesNamesCaller(MethodCallers));
+ AddCaller("__getattribute__", new TGetAttributeMethodCaller(GetAttr));
+ AddCaller("__setattr__", new TSetAttrMethodCaller(SetAttr));
+ AddCaller("__str__", new TGetStrReprMethodCaller("__str__"));
+ AddCaller("__repr__", new TGetStrReprMethodCaller("__repr__"));
+ AddCaller("__reduce_ex__", new TReduceMethodCaller);
+ AddCaller("__reduce__", new TReduceMethodCaller);
+ AddCaller("__getstate__", new TBaseGetStateMethodCaller);
+ AddCaller("__setstate__", new TBaseSetStateMethodCaller);
+
+ // generics
+ AddGetter("__class__", new TGenericAttrGetter<TObject>("__class__"));
+ AddGetter("__doc__", new TGenericAttrGetter<TObject>("__doc__"));
+ AddCaller("__sizeof__", new TGenericMethodCaller<TObject>("__sizeof__"));
+ AddCaller("__hash__", new TGenericMethodCaller<TObject>("__hash__"));
+ }
+
+ void AddGetter(const TString& attr, TGetterPtr getter) {
+ AttrGetters.AddGetter(attr, getter);
+ }
+
+ void AddSetter(const TString& attr, TSetterPtr setter) {
+ AttrSetters.AddSetter(attr, setter);
+ }
+
+ void AddCaller(const TString& name, TCallerPtr caller) {
+ MethodCallers.AddCaller(name, caller);
+ }
+
+ const TAttrGetters<TObject>& GetAttrGetters() const {
+ return AttrGetters;
+ }
+
+ TAttrSetters<TObject>& GetAttrSetters() {
+ return AttrSetters;
+ }
+
+ const TMethodCallers<TObject>& GetMethodCallers() const {
+ return MethodCallers;
+ }
+
+ const TSet<TString>& GetHiddenAttrs() const {
+ return HiddenAttrNames;
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetAttrsNamesCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ const TAttrGetters<TObjType>& AttrGetters;
+
+ public:
+ TGetAttrsNamesCaller(const TAttrGetters<TObjType>& getters)
+ : AttrGetters(getters)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (!ExtractArgs(args))
+ ythrow yexception() << "Could not parse args for GetAttrsNames() - it should be none";
+ TVector<TString> names;
+ AttrGetters.GetAttrsNames(owner, *self, names);
+ res = BuildPyObject(names);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetMethodsNamesCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ const TMethodCallers<TObjType>& MethodCallers;
+
+ public:
+ TGetMethodsNamesCaller(const TMethodCallers<TObjType>& callers)
+ : MethodCallers(callers)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (!ExtractArgs(args))
+ ythrow yexception() << "Could not parse args for GetMethodsNames() - it should be none";
+ TVector<TString> names;
+ MethodCallers.GetMethodsNames(owner, self, names);
+ res = BuildPyObject(names);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetAllNamesCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ const TAttrGetters<TObjType>& AttrGetters;
+ const TMethodCallers<TObjType>& MethodCallers;
+
+ public:
+ TGetAllNamesCaller(const TAttrGetters<TObjType>& getters,
+ const TMethodCallers<TObjType>& callers)
+ : AttrGetters(getters)
+ , MethodCallers(callers)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (!ExtractArgs(args))
+ ythrow yexception() << "Could not parse args for GetAllNames() - it should be none";
+ TVector<TString> names;
+ AttrGetters.GetAttrsNames(owner, *self, names);
+ MethodCallers.GetMethodsNames(owner, self, names);
+ res = BuildPyObject(names);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetPropertiesNamesCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ const TMethodCallers<TObjType>& MethodCallers;
+
+ public:
+ TGetPropertiesNamesCaller(const TMethodCallers<TObjType>& callers)
+ : MethodCallers(callers)
+ {
+ }
+
+ public:
+ bool CallMethod(PyObject* obj, TObjType* self, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (!ExtractArgs(args))
+ return false;
+
+ TVector<TString> names;
+ MethodCallers.GetPropertiesNames(obj, self, names);
+ res = BuildPyObject(names);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TDictAttrGetter: public TBaseAttrGetter<TObjType> {
+ private:
+ TAttrGetters<TObjType>& AttrGetters;
+
+ public:
+ TDictAttrGetter(TAttrGetters<TObjType>& getters)
+ : AttrGetters(getters)
+ {
+ }
+
+ bool GetAttr(PyObject* owner, const TObjType& self, const TString&, PyObject*& res) const override {
+ TMap<TString, PyObject*> dict;
+ AttrGetters.GetAttrsDictionary(owner, self, dict);
+ res = BuildPyObject(dict);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TDictAttrSetter: public TBaseAttrSetter<TObjType> {
+ private:
+ TAttrSetters<TObjType>& AttrSetters;
+
+ public:
+ TDictAttrSetter(TAttrSetters<TObjType>& setters)
+ : AttrSetters(setters)
+ {
+ }
+
+ bool SetAttr(PyObject* owner, TObjType& self, const TString&, PyObject* val) override {
+ TMap<TString, PyObject*> dict;
+ if (!FromPyObject(val, dict))
+ ythrow yexception() << "'__dict__' should be set to dictionary";
+ if (!AttrSetters.SetAttrDictionary(owner, self, dict))
+ return false;
+ return true;
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetAttributeMethodCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ GetAttrFunction GetAttr;
+
+ public:
+ TGetAttributeMethodCaller(GetAttrFunction getAttr)
+ : GetAttr(getAttr)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType*, PyObject* args, PyObject*, PyObject*& res) const override {
+ TString attrName;
+ if (!ExtractArgs(args, attrName))
+ ythrow yexception() << "Could not parse args for '__getattribute__' - it should be one string";
+ res = GetAttr(owner, const_cast<char*>(attrName.c_str()));
+ if (!res)
+ // Error already set
+ return false;
+ return true;
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TSetAttrMethodCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ SetAttrFunction SetAttr;
+
+ public:
+ TSetAttrMethodCaller(SetAttrFunction setAttr)
+ : SetAttr(setAttr)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType*, PyObject* args, PyObject*, PyObject*& res) const override {
+ TString attrName;
+ TPyObjectPtr value;
+ if (!ExtractArgs(args, attrName, value))
+ ythrow yexception() << "Could not parse args for '__setattr__' - it should be one string and value";
+ Py_INCREF(Py_None);
+ res = Py_None;
+ if (-1 == SetAttr(owner, const_cast<char*>(attrName.c_str()), value.Get()))
+ // Error already set
+ return false;
+ return true;
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TGetStrReprMethodCaller: public TBaseMethodCaller<TObjType> {
+ private:
+ TString MethodName;
+
+ private:
+ const TString GetFullName(PyObject* obj) const {
+ TString module, name;
+ TPyObjectPtr type(PyObject_Type(obj), true);
+ if (!FromPyObject(PyObject_GetAttrString(type.Get(), "__module__"), module) || !FromPyObject(PyObject_GetAttrString(type.Get(), "__name__"), name))
+ ythrow yexception() << "Could not get name of object";
+ return module + "." + name;
+ }
+
+ public:
+ TGetStrReprMethodCaller(const TString& methodName)
+ : MethodName(methodName)
+ {
+ }
+
+ bool CallMethod(PyObject* owner, TObjType*, PyObject* args, PyObject*, PyObject*& res) const override {
+ if (args && !ExtractArgs(args))
+ ythrow yexception() << "Could not parse args for '" << MethodName << "'";
+ TString message = TString("<") + GetFullName(owner) + " object>";
+ res = ReturnString(message);
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TReduceMethodCaller: public TBaseMethodCaller<TObjType> {
+ public:
+ bool CallMethod(PyObject* owner, TObjType*, PyObject*, PyObject*, PyObject*& res) const override {
+ TPyObjectPtr tuple(PyTuple_New(3), true);
+ // First component: reconstructor
+ TPyObjectPtr pybindName(BuildPyObject("pybind"), true);
+ TPyObjectPtr mainModule(PyImport_Import(pybindName.Get()), true);
+ TPyObjectPtr recName(BuildPyObject("PyBindObjectReconstructor"), true);
+ TPyObjectPtr reconstructor(PyObject_GetAttr(mainModule.Get(), recName.Get()), true);
+ // Second component: arguments to rebuild object
+ TPyObjectPtr arguments(PyTuple_New(2), true);
+ TPyObjectPtr cl(PyObject_GetAttrString(owner, "__class__"), true);
+ PyTuple_SET_ITEM(arguments.Get(), 0, cl.RefGet());
+ TPyObjectPtr props(PyObject_CallMethod(owner, const_cast<char*>("GetPropertiesNames"), nullptr), true);
+ PyTuple_SET_ITEM(arguments.Get(), 1, props.RefGet());
+ // Third component: state to fill new object
+ TPyObjectPtr state(PyObject_CallMethod(owner, const_cast<char*>("__getstate__"), nullptr), true);
+
+ PyTuple_SET_ITEM(tuple.Get(), 0, reconstructor.RefGet());
+ PyTuple_SET_ITEM(tuple.Get(), 1, arguments.RefGet());
+ PyTuple_SET_ITEM(tuple.Get(), 2, state.RefGet());
+ res = tuple.RefGet();
+ return (res != nullptr);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TBaseGetStateMethodCaller: public TGetStateCaller<TObjType, TObjType> {
+ public:
+ void GetAttrsDictionary(PyObject* obj, TObjType* self, TMap<TString, TPyObjectPtr>& dict) const override {
+ this->GetStandartAttrsDictionary(obj, self, dict);
+ }
+ };
+
+ template <typename TObjType>
+ class TPythonTypeAttributes<TObjType>::TBaseSetStateMethodCaller: public TSetStateCaller<TObjType, TObjType> {
+ public:
+ void SetAttrsDictionary(PyObject* obj, TObjType* self, TMap<TString, TPyObjectPtr>& dict) const override {
+ this->SetStandartAttrsDictionary(obj, self, dict);
+ }
+ };
+
+ static const char* HiddenAttrStrings[] = {
+ "__dict__", "__class__", "__dir__", "__delattr__", "__doc__", "__format__", "__getattribute__", "__hash__",
+ "__init__", "__new__", "__reduce__", "__reduce_ex__", "__repr__", "__setattr__", "__sizeof__", "__str__",
+ "__subclasshook__", "__getstate__", "__setstate__",
+ "GetAttrsNames", "GetMethodsNames", "GetAllNames", "GetPropertiesNames"};
+
+ template <typename T>
+ const TSet<TString> TPythonTypeAttributes<T>::HiddenAttrNames(HiddenAttrStrings, std::end(HiddenAttrStrings));
+
+}
diff --git a/library/cpp/pybind/typedesc.cpp b/library/cpp/pybind/typedesc.cpp
new file mode 100644
index 0000000000..75f39fd126
--- /dev/null
+++ b/library/cpp/pybind/typedesc.cpp
@@ -0,0 +1,79 @@
+#include "typedesc.h"
+
+#include <util/generic/singleton.h>
+
+static void RegisterJSONBridgeImpl() {
+ PyRun_SimpleString("import json\n"
+ "class PyBindEncoder(json.JSONEncoder):\n"
+ " def default(self, obj):\n"
+ " if isinstance(obj, bytes):\n"
+ " try:\n"
+ " return obj.decode()\n"
+ " except UnicodeDecodeError:\n"
+ " return obj.hex()\n"
+ " dct = None\n"
+ " if hasattr(obj, '__getstate__'):\n"
+ " dct = obj.__getstate__()\n"
+ " elif hasattr(obj, '__dict__'):\n"
+ " dct = obj.__dict__\n"
+ " if dct is None:\n"
+ " return json.JSONEncoder.default(self, obj)\n"
+ " if hasattr(obj, '__class__'):\n"
+ " if hasattr(obj.__class__, '__name__'):\n"
+ " dct['__name__'] = obj.__class__.__name__\n"
+ " if hasattr(obj.__class__, '__module__'):\n"
+ " dct['__module__'] = obj.__class__.__module__\n"
+ " if hasattr(obj, 'GetPropertiesNames'):\n"
+ " dct['__properties__'] = obj.GetPropertiesNames()\n"
+ " return dct");
+
+ PyRun_SimpleString("def PyBindObjectHook(dct):\n"
+ " if '__name__' in dct:\n"
+ " name = dct['__name__']\n"
+ " module = dct['__module__']\n"
+ " del dct['__name__']\n"
+ " del dct['__module__']\n"
+ " cls = getattr(__import__(module), name)\n"
+ " if '__properties__' in dct:\n"
+ " props = dct['__properties__']\n"
+ " del dct['__properties__']\n"
+ " if len(props) == 0:\n"
+ " return dct\n"
+ " instance = cls(__properties__ = props)\n"
+ " else:\n"
+ " instance = cls()\n"
+ " if hasattr(instance, '__setstate__'):\n"
+ " instance.__setstate__(dct)\n"
+ " elif hasattr(instance, '__dict__'):\n"
+ " instance.__dict__ = dct\n"
+ " else:\n"
+ " return dct\n"
+ " return instance\n"
+ " return dct");
+
+ PyRun_SimpleString("def json_dump(*args, **kwargs):\n"
+ " kwargs['cls'] = PyBindEncoder\n"
+ " return json.dump(*args, **kwargs)\n"
+ "def json_dumps(*args, **kwargs):\n"
+ " kwargs['cls'] = PyBindEncoder\n"
+ " return json.dumps(*args, **kwargs)");
+
+ PyRun_SimpleString("def json_load(*args, **kwargs):\n"
+ " kwargs['object_hook'] = PyBindObjectHook\n"
+ " return json.load(*args, **kwargs)\n"
+ "def json_loads(*args, **kwargs):\n"
+ " kwargs['object_hook'] = PyBindObjectHook\n"
+ " return json.loads(*args, **kwargs)");
+}
+
+namespace {
+ struct TJSONBridge {
+ TJSONBridge() {
+ RegisterJSONBridgeImpl();
+ }
+ };
+}
+
+void NPyBind::RegisterJSONBridge() {
+ Singleton<TJSONBridge>();
+}
diff --git a/library/cpp/pybind/typedesc.h b/library/cpp/pybind/typedesc.h
new file mode 100644
index 0000000000..57eacb0f3a
--- /dev/null
+++ b/library/cpp/pybind/typedesc.h
@@ -0,0 +1,545 @@
+#pragma once
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <structmember.h>
+
+#include "typeattrs.h"
+#include "exceptions.h"
+#include "module.h"
+
+namespace NPyBind {
+ void RegisterJSONBridge();
+
+ namespace NPrivate {
+ template <typename>
+ class TUnboundClosureHolder;
+ template <typename>
+ class TUnboundClosure;
+ }
+
+ // TTraits should be derived from TPythonType
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ class TPythonType {
+ private:
+ TPythonType(const TPythonType&);
+ TPythonType& operator=(const TPythonType&);
+
+ private:
+ typedef typename TPythonTypeAttributes<TObject>::TGetterPtr TGetterPtr;
+ typedef typename TPythonTypeAttributes<TObject>::TSetterPtr TSetterPtr;
+ typedef typename TPythonTypeAttributes<TObject>::TCallerPtr TCallerPtr;
+
+ struct TProxy {
+ PyObject_HEAD
+ TObjectHolder* Holder;
+ };
+
+ static PyTypeObject PyType;
+ static PyMappingMethods MappingMethods;
+ static PyObject* PyTypeObjPtr;
+ protected:
+ static PyTypeObject* GetPyTypePtr() {
+ return &PyType;
+ }
+ private:
+
+ TPythonTypeAttributes<TObject> Attributes;
+
+ static int InitObject(PyObject* s, PyObject* args, PyObject* kwargs) {
+ try {
+ TProxy* self = reinterpret_cast<TProxy*>(s);
+ auto str = NameFromString("__properties__");
+ if (kwargs && PyDict_Check(kwargs) && PyDict_Contains(kwargs, str.Get())) {
+ TPyObjectPtr props(PyDict_GetItem(kwargs, str.Get()));
+ TVector<TString> properties;
+ FromPyObject(props.Get(), properties);
+ self->Holder = TTraits::DoInitPureObject(properties);
+ } else {
+ self->Holder = (args || kwargs) ? TTraits::DoInitObject(args, kwargs) : nullptr;
+ }
+ if (PyErr_Occurred())
+ return -1;
+ return 0;
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, "Unknown error occurred while trying to init object");
+ }
+ return -1;
+ }
+
+ static void DeallocObject(TProxy* self) {
+ delete self->Holder;
+ Py_TYPE(self)->tp_free(reinterpret_cast<PyObject*>(self));
+ }
+
+ static PyObject* GetObjectAttr(PyObject* pyObj, char* attr);
+ static int SetObjectAttr(PyObject* pyObj, char* attr, PyObject* value);
+ static PyObject* GetStr(PyObject*);
+ static PyObject* GetRepr(PyObject*);
+ static PyObject* GetIter(PyObject*);
+ static PyObject* GetNext(PyObject*);
+
+ // Fill class __dict__ with functions to make sure methods names will get to dir()
+ void FillClassDict() const {
+ TVector<TString> names;
+ Attributes.GetMethodCallers().GetAllMethodsNames(names);
+ for (const auto& name : names) {
+ TPyObjectPtr callable = NPrivate::TUnboundClosure<TObject>::Instance().CreatePyObject(new NPrivate::TUnboundClosureHolder<TObject>(&PyType, name));
+ PyDict_SetItemString(PyType.tp_dict, name.c_str(), callable.Get());
+ }
+ }
+
+ void InitCommonAttributes() {
+ static bool was = false;
+ if (was)
+ return;
+ was = true;
+ Attributes.InitCommonAttributes();
+ FillClassDict();
+ }
+
+ protected:
+ TPythonType(const char* pyTypeName, const char* typeDescr, PyTypeObject* parentType = nullptr)
+ : Attributes(GetObjectAttr, SetObjectAttr)
+ {
+ PyType.tp_name = pyTypeName;
+ PyType.tp_doc = typeDescr;
+ Py_INCREF(PyTypeObjPtr);
+ if (parentType) {
+ Py_INCREF(parentType);
+ PyType.tp_base = parentType;
+ }
+ PyType_Ready(&PyType);
+
+ TExceptionsHolder::Instance();
+ RegisterJSONBridge();
+
+ }
+
+ ~TPythonType() {
+ }
+
+ static TObjectHolder* DoInitObject(PyObject*, PyObject*) {
+ return nullptr;
+ }
+
+ static TObjectHolder* DoInitPureObject(const TVector<TString>&) {
+ return nullptr;
+ }
+
+ static void SetClosure(PyObject* (*call)(PyObject*, PyObject*, PyObject*)) {
+ PyType.tp_call = call;
+ }
+
+ public:
+ void AddGetter(const TString& attr, TGetterPtr getter) {
+ Attributes.AddGetter(attr, getter);
+ }
+
+ void AddSetter(const TString& attr, TSetterPtr setter) {
+ Attributes.AddSetter(attr, setter);
+ }
+
+ void AddCaller(const TString& name, TCallerPtr caller) {
+ Attributes.AddCaller(name, caller);
+ if (name == "__iter__") {
+ PyType.tp_iter = GetIter;
+ }
+ if (name == "next") {
+ PyType.tp_iternext = GetNext;
+ }
+ }
+
+ void SetIter(getiterfunc tp_iter) {
+ PyType.tp_iter = tp_iter;
+ }
+
+ void SetIterNext(iternextfunc tp_iternext) {
+ PyType.tp_iternext = tp_iternext;
+ }
+
+ void SetDestructor(destructor tp_dealloc) {
+ PyType.tp_dealloc = tp_dealloc;
+ }
+
+ void SetLengthFunction(lenfunc mp_length) {
+ PyType.tp_as_mapping->mp_length = mp_length;
+ }
+
+ void SetSubscriptFunction(binaryfunc mp_subscript) {
+ PyType.tp_as_mapping->mp_subscript = mp_subscript;
+ }
+
+ void SetAssSubscriptFunction(objobjargproc mp_ass_subscript) {
+ PyType.tp_as_mapping->mp_ass_subscript = mp_ass_subscript;
+ }
+
+ typedef TObject TObjectType;
+
+ static TPythonType& Instance() {
+ static TTraits Traits;
+ Traits.InitCommonAttributes();
+ return Traits;
+ }
+
+ void Register(PyObject* module, const char* typeName) {
+ Py_INCREF(PyTypeObjPtr);
+ if (0 != PyModule_AddObject(module, typeName, PyTypeObjPtr))
+ ythrow yexception() << "can't register type \"" << typeName << "\"";
+ }
+
+ void Register(PyObject* module, const char* objName, TObjectHolder* hld) {
+ if (0 != PyModule_AddObject(module, objName, CreatePyObject(hld).RefGet()))
+ ythrow yexception() << "can't register object \"" << objName << "\"";
+ }
+
+ void Register(TPyObjectPtr module, const TString& typeName) {
+ Register(module.Get(), typeName.c_str());
+ }
+
+ void Register(TPyObjectPtr module, const TString& objName, TObjectHolder* hld) {
+ Register(module.Get(), objName.c_str(), hld);
+ }
+
+ static TObjectHolder* CastToObjectHolder(PyObject* obj) {
+ // Call Instance() to make sure PyTypeObjPtr is already created at this point
+ Instance();
+ if (!PyObject_IsInstance(obj, PyTypeObjPtr))
+ return nullptr;
+ TProxy* prx = reinterpret_cast<TProxy*>(obj);
+ return prx ? prx->Holder : nullptr;
+ }
+
+ static TObject* CastToObject(PyObject* obj) {
+ TObjectHolder* hld = CastToObjectHolder(obj);
+ return hld ? TTraits::GetObject(*hld) : nullptr;
+ }
+
+ static TPyObjectPtr CreatePyObject(TObjectHolder* hld) {
+ TPyObjectPtr r(_PyObject_New(&PyType), true);
+ TProxy* prx = reinterpret_cast<TProxy*>(r.Get());
+ if (prx)
+ prx->Holder = hld;
+ return r;
+ }
+ };
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyMappingMethods TPythonType<TObjectHolder, TObject, TTraits>::MappingMethods = {nullptr, nullptr, nullptr};
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyTypeObject TPythonType<TObjectHolder, TObject, TTraits>::PyType = {
+ PyVarObject_HEAD_INIT(nullptr, 0) "", sizeof(TProxy), 0, (destructor)&DeallocObject
+#if PY_VERSION_HEX < 0x030800b4
+ , nullptr, /*tp_print*/
+#endif
+#if PY_VERSION_HEX >= 0x030800b4
+ , 0, /*tp_vectorcall_offset*/
+#endif
+ &GetObjectAttr, &SetObjectAttr, nullptr, &GetRepr, nullptr, nullptr, &MappingMethods, nullptr, nullptr, &GetStr, nullptr, nullptr, nullptr,
+ Py_TPFLAGS_DEFAULT, "", nullptr, nullptr, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, InitObject, PyType_GenericAlloc, PyType_GenericNew, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0
+#if PY_MAJOR_VERSION >= 3
+ , nullptr
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ , nullptr /*tp_vectorcall*/
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ , nullptr /*tp_print*/
+#endif
+#if PY_VERSION_HEX >= 0x030C0000
+ , 0 /*tp_watched*/
+#endif
+ };
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::PyTypeObjPtr =
+ reinterpret_cast<PyObject*>(&TPythonType<TObjectHolder, TObject, TTraits>::PyType);
+
+ namespace NPrivate {
+ template <typename TObject>
+ class TUnboundClosureHolder {
+ private:
+ THolder<PyTypeObject> Holder;
+ TString Method;
+
+ public:
+ TUnboundClosureHolder(PyTypeObject* ptr, const TString& meth)
+ : Holder(ptr)
+ , Method(meth)
+ {
+ }
+
+ PyTypeObject* GetObject() const {
+ return Holder.Get();
+ }
+
+ const TString GetMethod() const {
+ return Method;
+ }
+
+ PyObject* Call(PyObject* obj, PyObject* args, PyObject*) const {
+ TPyObjectPtr callable(PyObject_GetAttrString(obj, Method.c_str()), true);
+ if (!callable.Get())
+ ythrow yexception() << "PyBind can't call method '" << Method << "'";
+ TPyObjectPtr res(PyObject_CallObject(callable.Get(), args), true);
+ if (!res.Get() && !PyErr_Occurred())
+ ythrow yexception() << "PyBind can't call method '" << Method << "'";
+ return res.RefGet();
+ }
+ };
+
+ template <typename TObject>
+ class TUnboundClosure: public NPyBind::TPythonType<TUnboundClosureHolder<TObject>, PyTypeObject, TUnboundClosure<TObject>> {
+ private:
+ typedef class NPyBind::TPythonType<TUnboundClosureHolder<TObject>, PyTypeObject, TUnboundClosure<TObject>> TParent;
+ friend class NPyBind::TPythonType<TUnboundClosureHolder<TObject>, PyTypeObject, TUnboundClosure<TObject>>;
+
+ class TReprMethodCaller: public TBaseMethodCaller<PyTypeObject> {
+ public:
+ bool CallMethod(PyObject* closure, PyTypeObject*, PyObject*, PyObject*, PyObject*& res) const override {
+ TUnboundClosureHolder<TObject>* hld = TParent::CastToObjectHolder(closure);
+ TPyObjectPtr type((PyObject*)hld->GetObject());
+
+ TString nameStr;
+ TPyObjectPtr name(PyObject_GetAttrString(type.Get(), "__name__"), true);
+ if (!name.Get() || !FromPyObject(name.Get(), nameStr))
+ ythrow yexception() << "Could not get name of object";
+
+ TString methodName(hld->GetMethod());
+
+ TString message = "<unbound method " + nameStr + "." + methodName + ">";
+ res = ReturnString(message);
+ return (res != nullptr);
+ }
+ };
+
+ private:
+ TUnboundClosure()
+ : TParent("", "")
+ {
+ TParent::AddCaller("__repr__", new TReprMethodCaller());
+ TParent::AddCaller("__str__", new TReprMethodCaller());
+ TParent::SetClosure(&Call);
+ }
+
+ static PyObject* Call(PyObject* closure, PyObject* args, PyObject* kwargs) {
+ try {
+ TUnboundClosureHolder<TObject>* hld = TParent::CastToObjectHolder(closure);
+ if (!hld)
+ ythrow yexception() << "Can't cast object to TypeHolder";
+
+ size_t size = 0;
+ if (!PyTuple_Check(args) || (size = PyTuple_Size(args)) < 1)
+ ythrow yexception() << "Can't parse first argument: it should be valid object";
+ --size;
+ TPyObjectPtr obj(PyTuple_GetItem(args, 0));
+ TPyObjectPtr newArgs(PyTuple_New(size), true);
+
+ for (size_t i = 0; i < size; ++i) {
+ TPyObjectPtr item(PyTuple_GetItem(args, i + 1));
+ PyTuple_SetItem(newArgs.Get(), i, item.RefGet());
+ }
+
+ return hld->Call(obj.Get(), newArgs.Get(), kwargs);
+ } catch (const std::exception& ex) {
+ PyErr_SetString(::NPyBind::TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, "Unknown error occurred while trying to call method");
+ }
+ return nullptr;
+ }
+
+ static PyTypeObject* GetObject(TUnboundClosureHolder<TObject>& obj) {
+ return obj.GetObject();
+ }
+ };
+
+ template <typename TObject>
+ class TBoundClosureHolder {
+ private:
+ TPyObjectPtr Ptr;
+ TObject* Object;
+ TString Method;
+ const TMethodCallers<TObject>& MethodCallers;
+
+ public:
+ TBoundClosureHolder(PyObject* ptr, TObject* obj, const TString& meth, const TMethodCallers<TObject>& callers)
+ : Ptr(ptr)
+ , Object(obj)
+ , Method(meth)
+ , MethodCallers(callers)
+ {
+ }
+
+ TPyObjectPtr GetObjectPtr() const {
+ return Ptr;
+ }
+
+ TObject* GetObject() const {
+ return Object;
+ }
+
+ const TString GetMethod() const {
+ return Method;
+ }
+
+ PyObject* Call(PyObject* args, PyObject* kwargs) const {
+ PyObject* res = MethodCallers.CallMethod(Ptr.Get(), Object, args, kwargs, Method);
+ if (res == nullptr && !PyErr_Occurred())
+ ythrow yexception() << "PyBind can't call method '" << Method << "'";
+ return res;
+ }
+ };
+
+ template <typename TObject>
+ class TBoundClosure: public TPythonType<TBoundClosureHolder<TObject>, TObject, TBoundClosure<TObject>> {
+ private:
+ typedef TPythonType<TBoundClosureHolder<TObject>, TObject, TBoundClosure<TObject>> TMyParent;
+ class TReprMethodCaller: public TBaseMethodCaller<TObject> {
+ public:
+ bool CallMethod(PyObject* closure, TObject*, PyObject*, PyObject*, PyObject*& res) const override {
+ TBoundClosureHolder<TObject>* hld = TMyParent::CastToObjectHolder(closure);
+ TPyObjectPtr obj(hld->GetObjectPtr());
+ TPyObjectPtr type(PyObject_Type(obj.Get()), true);
+
+ TString reprStr;
+ TPyObjectPtr repr(PyObject_Repr(obj.Get()), true);
+ if (!repr.Get() || !FromPyObject(repr.Get(), reprStr))
+ ythrow yexception() << "Could not get repr of object";
+
+ TString nameStr;
+ TPyObjectPtr name(PyObject_GetAttrString(type.Get(), "__name__"), true);
+ if (!name.Get() || !FromPyObject(name.Get(), nameStr))
+ ythrow yexception() << "Could not get name of object";
+
+ TString methodName(hld->GetMethod());
+
+ TString message = "<bound method " + nameStr + "." + methodName + " of " + reprStr + ">";
+ res = ReturnString(message);
+ return (res != nullptr);
+ }
+ };
+
+ private:
+ static PyObject* Call(PyObject* closure, PyObject* args, PyObject* kwargs) {
+ try {
+ TBoundClosureHolder<TObject>* hld = TMyParent::CastToObjectHolder(closure);
+ if (!hld)
+ ythrow yexception() << "Can't cast object to ClosureHolder";
+
+ return hld->Call(args, kwargs);
+ } catch (const std::exception& ex) {
+ PyErr_SetString(::NPyBind::TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, "Unknown error occurred while trying to call method");
+ }
+ return nullptr;
+ }
+
+ public:
+ TBoundClosure()
+ : TMyParent("", "")
+ {
+ TMyParent::AddCaller("__repr__", new TReprMethodCaller());
+ TMyParent::AddCaller("__str__", new TReprMethodCaller());
+ TMyParent::SetClosure(&Call);
+ }
+
+ static TObject* GetObject(const TBoundClosureHolder<TObject>& closure) {
+ return closure.GetObject();
+ }
+ };
+
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::GetObjectAttr(PyObject* pyObj, char* attr) {
+ try {
+ TObject* obj = CastToObject(pyObj);
+ PyObject* res = obj ? Instance().Attributes.GetAttrGetters().GetAttr(pyObj, *obj, attr) : nullptr;
+ if (res == nullptr && Instance().Attributes.GetMethodCallers().HasMethod(pyObj, obj, attr)) {
+ TPyObjectPtr r = NPrivate::TBoundClosure<TObject>::Instance().CreatePyObject(new NPrivate::TBoundClosureHolder<TObject>(pyObj, obj, attr, Instance().Attributes.GetMethodCallers()));
+ res = r.RefGet();
+ }
+ if (res == nullptr && !PyErr_Occurred())
+ ythrow TPyErr(PyExc_AttributeError) << "PyBind can't get attribute '" << attr << "'";
+ return res;
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to get attribute '") + attr + "'").c_str());
+ }
+ return nullptr;
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ int TPythonType<TObjectHolder, TObject, TTraits>::SetObjectAttr(PyObject* pyObj, char* attr, PyObject* value) {
+ try {
+ TObject* obj = CastToObject(pyObj);
+ bool res = obj ? Instance().Attributes.GetAttrSetters().SetAttr(pyObj, *obj, attr, value) : false;
+ if (!res && !PyErr_Occurred())
+ ythrow yexception() << "PyBind can't set attribute '" << attr << "'";
+ return res ? 0 : -1;
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to set attribute '") + attr + "'").c_str());
+ }
+ return -1;
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::GetStr(PyObject* obj) {
+ try {
+ TObject* self = CastToObject(obj);
+ return Instance().Attributes.GetMethodCallers().CallMethod(obj, self, nullptr, nullptr, "__str__");
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to call '__str__'").c_str()));
+ }
+ return nullptr;
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::GetIter(PyObject* obj) {
+ try {
+ TObject* self = CastToObject(obj);
+ return Instance().Attributes.GetMethodCallers().CallMethod(obj, self, nullptr, nullptr, "__iter__");
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to call '__iter__'").c_str()));
+ }
+ return nullptr;
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::GetNext(PyObject* obj) {
+ try {
+ TObject* self = CastToObject(obj);
+ return Instance().Attributes.GetMethodCallers().CallMethod(obj, self, nullptr, nullptr, "next");
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to call 'next'").c_str()));
+ }
+ return nullptr;
+ }
+
+ template <typename TObjectHolder, typename TObject, typename TTraits>
+ PyObject* TPythonType<TObjectHolder, TObject, TTraits>::GetRepr(PyObject* obj) {
+ try {
+ TObject* self = CastToObject(obj);
+ return Instance().Attributes.GetMethodCallers().CallMethod(obj, self, nullptr, nullptr, "__repr__");
+ } catch (const std::exception& ex) {
+ PyErr_SetString(TExceptionsHolder::Instance().ToPyException(ex).Get(), ex.what());
+ } catch (...) {
+ PyErr_SetString(PyExc_RuntimeError, (TString("Unknown error occurred while trying to call '__repr__'").c_str()));
+ }
+ return nullptr;
+ }
+}
diff --git a/library/cpp/pybind/v2.cpp b/library/cpp/pybind/v2.cpp
new file mode 100644
index 0000000000..edce0be719
--- /dev/null
+++ b/library/cpp/pybind/v2.cpp
@@ -0,0 +1,43 @@
+#include "v2.h"
+namespace NPyBind {
+ namespace Detail {
+ template <>
+ PyTypeObject* GetParentType<void>(const TPyModuleDefinition&) {
+ return nullptr;
+ }
+
+
+ template <bool InitEnabled>
+ void UpdateClassNamesInModule(TPyModuleDefinition& M, const TString& name, PyTypeObject* pythonType) {
+ if (!InitEnabled) {
+ return;
+ }
+ M.ClassName2Type[name] = pythonType;
+ }
+
+ template <bool InitEnabled>
+ void UpdateGetContextInModule(TPyModuleDefinition& M, const TString& name, IGetContextBase* base) {
+ if (!InitEnabled) {
+ return;
+ }
+ M.Class2ContextGetter[name] = base;
+ }
+
+ TPyModuleRegistry::TPyModuleRegistry() {
+#if PY_MAJOR_VERSION >= 3
+ NPrivate::AddFinalizationCallBack([this]() {
+ if (UnnamedModule) {
+ UnnamedModule.Clear();
+ }
+ Name2Def.clear();
+ });
+#endif
+ }
+ template void UpdateClassNamesInModule<false>(TPyModuleDefinition& M, const TString& name, PyTypeObject* pythonType);
+ template void UpdateClassNamesInModule<true>(TPyModuleDefinition& M, const TString& name, PyTypeObject* pythonType);
+
+
+ template void UpdateGetContextInModule<false>(TPyModuleDefinition& M, const TString& name, IGetContextBase* pythonType);
+ template void UpdateGetContextInModule<true>(TPyModuleDefinition& M, const TString& name, IGetContextBase* pythonType);
+ }//Detail
+}//NPyBind
diff --git a/library/cpp/pybind/v2.h b/library/cpp/pybind/v2.h
new file mode 100644
index 0000000000..f561d6a380
--- /dev/null
+++ b/library/cpp/pybind/v2.h
@@ -0,0 +1,514 @@
+#pragma once
+
+#include <library/cpp/pybind/method.h>
+#include <library/cpp/pybind/typedesc.h>
+#include <library/cpp/pybind/module.h>
+#include <util/generic/hash.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/string.h>
+namespace NPyBind {
+#define DEFINE_CONVERTERS_IMPL(TClass) \
+ PyObject* BuildPyObject(typename TClass::TBase&& base) { \
+ return TClass::BuildPyObject(std::move(base)); \
+ } \
+ PyObject* BuildPyObject(const typename TClass::TBase& base) { \
+ return TClass::BuildPyObject(base); \
+ }
+
+#define DEFINE_CONVERTERS(function) DEFINE_CONVERTERS_IMPL(TFunctionResult<decltype(function)>)
+
+#define DEFINE_TRANSFORMERS_IMPL(TClass) \
+ template <> \
+ bool ::NPyBind::FromPyObject<typename TClass::TBase*>(PyObject * obj, typename TClass::TBase * &res) { \
+ res = TClass::CastToObject(obj); \
+ return res != nullptr; \
+ } \
+ template <> \
+ bool ::NPyBind::FromPyObject<typename TClass::TBase const*>(PyObject * obj, typename TClass::TBase const*& res) { \
+ res = TClass::CastToObject(obj); \
+ return res != nullptr; \
+ }
+
+#define DEFINE_TRANSFORMERS(function) DEFINE_TRANSFORMERS_IMPL(TFunctionResult<decltype(function)>)
+
+ namespace Detail {
+ struct IGetContextBase {
+ virtual ~IGetContextBase() = default;
+ };
+ } //Detail
+ struct TPyModuleDefinition {
+ static void InitModule(const TString& name);
+ static TPyModuleDefinition& GetModule();
+
+ TString Name;
+ NPyBind::TPyObjectPtr M;
+ THashMap<TString, PyTypeObject*> ClassName2Type;
+ THashMap<TString, Detail::IGetContextBase*> Class2ContextGetter;
+ };
+
+ namespace Detail {
+ // Manages modules lifecycle
+ // IMPORTANT!!! Don't use it in PyBind v1 environment, it will lead to inconsistent state of v1 module
+ // UnnamedModule-> new unnamed module stub, this stub become current module. In this case you can add functions to it
+ // InitModuleWithName -> convert unnamed module into named one, now you can switch to it in switch, this module remains current
+ // SwitchToModule switches to the particular module in registry, this module becomes current.
+ class TPyModuleRegistry {
+ private:
+ TPyModuleRegistry();
+ TPyModuleRegistry(const TPyModuleRegistry&) = delete;
+ TPyModuleRegistry& operator=(TPyModuleRegistry&) = delete;
+ public:
+ static TPyModuleRegistry& Get() {
+ static TPyModuleRegistry registry;
+ return registry;
+ }
+ TPyModuleDefinition& GetCurrentModule() {
+ if (!CurrentModule) {
+ GetUnnamedModule();
+ }
+ return *CurrentModule;
+ }
+
+ TPyModuleDefinition& GetUnnamedModule() {
+ if (!UnnamedModule) {
+ UnnamedModule = TPyModuleDefinition();
+ CurrentModule = const_cast<TPyModuleDefinition*>(UnnamedModule.Get());
+ }
+ return *UnnamedModule;
+ }
+
+ TPyModuleDefinition& InitModuleWithName(const TString& name) {
+ if (!UnnamedModule) {
+ GetUnnamedModule();
+ }
+ Name2Def[name] = *UnnamedModule;
+ UnnamedModule.Clear();
+ CurrentModule = &Name2Def[name];
+ return *CurrentModule;
+ }
+
+ TPyModuleDefinition& SwitchToModuleByName(const TString& name) {
+ Y_ENSURE(Name2Def.contains(name));
+ Y_ENSURE(UnnamedModule.Empty());
+ CurrentModule = &Name2Def[name];
+ return *CurrentModule;
+ }
+ private:
+ TPyModuleDefinition* CurrentModule = nullptr;
+ TMaybe<TPyModuleDefinition> UnnamedModule;//
+ THashMap<TString, TPyModuleDefinition> Name2Def;
+ };
+ }//Detail
+
+ inline void TPyModuleDefinition::InitModule(const TString& name) {
+ Detail::TPyModuleRegistry::Get().GetUnnamedModule() = TPyModuleDefinition{name, TModuleHolder::Instance().InitModule(name), {}, {}};
+ Detail::TPyModuleRegistry::Get().InitModuleWithName(name);
+ }
+
+ inline TPyModuleDefinition& TPyModuleDefinition::GetModule() {
+ return Detail::TPyModuleRegistry::Get().GetCurrentModule();
+ }
+
+ namespace Detail {
+ template <class TPythonType>
+ struct TNameCtx {
+ TString ClassShortName;
+ static TNameCtx& GetNameCtx() {
+ static TNameCtx result;
+ return result;
+ }
+ };
+ template <class TBase>
+ struct TContextImpl {
+ PyTypeObject* ParentType = nullptr;
+ TString ClassShortName;
+ TString ClassFullName;
+ TString ClassDescription;
+
+
+ TVector<std::pair<TString, typename TPythonTypeAttributes<TBase>::TCallerPtr>> ListCallers;
+ TVector<std::pair<TString, typename TPythonTypeAttributes<TBase>::TGetterPtr>> ListGetters;
+ TVector<std::pair<TString, typename TPythonTypeAttributes<TBase>::TSetterPtr>> ListSetters;
+ };
+
+ template <class TObject>
+ struct IGetContext: public IGetContextBase {
+ virtual ~IGetContext() = default;
+ virtual const TContextImpl<TObject>& GetContext() const = 0;
+ };
+
+ template <typename THolderClass, typename TBaseClass, bool ShouldEnable, typename=std::enable_if_t<!ShouldEnable || !std::is_default_constructible_v<TBaseClass>>>
+ THolderClass* DoInitPureObject(const TVector<TString>&) {
+ ythrow yexception() << "Can't create this object in pure mode from python";
+ }
+
+ template <typename THolderClass, typename TBaseClass, bool ShouldEnable, typename=std::enable_if_t<ShouldEnable && std::is_default_constructible_v<TBaseClass>>, typename=void>
+ THolderClass* DoInitPureObject(const TVector<TString>&) {
+ return new THolderClass(MakeHolder<TBaseClass>());
+ }
+
+ template <typename T>
+ PyTypeObject* GetParentType(const TPyModuleDefinition& m) {
+ auto shortName = Detail::TNameCtx<T>::GetNameCtx().ClassShortName;
+ auto it = m.ClassName2Type.find(shortName);
+ return (it == m.ClassName2Type.end()) ? nullptr : it->second;
+ }
+
+ template <>
+ PyTypeObject* GetParentType<void>(const TPyModuleDefinition&);
+
+ template <bool InitEnabled>
+ void UpdateClassNamesInModule(TPyModuleDefinition& M, const TString& name, PyTypeObject* pythonType);
+
+ template <bool InitEnabled>
+ void UpdateGetContextInModule(TPyModuleDefinition& M, const TString& name, IGetContextBase* base);
+ }
+
+
+ template <class TParentPyClass_=void>
+ struct TPyParentClassTraits {
+ using TParentPyClass = TParentPyClass_;
+ };
+
+ template <bool InitEnabled_, class TParentPyClass_=void>
+ struct TPyClassConfigTraits: public TPyParentClassTraits<TParentPyClass_> {
+ constexpr static bool InitEnabled = InitEnabled_;
+ constexpr static bool RawInit = false;
+ };
+
+ template <class TParentPyClass_=void>
+ struct TPyClassRawInitConfigTraits: public TPyParentClassTraits<TParentPyClass_> {
+ constexpr static bool InitEnabled = true;
+ constexpr static bool RawInit = true;
+ };
+
+
+ template <typename TBaseClass, typename TPyClassConfigTraits, typename... ConstructorArgs>
+ class TPyClass {
+ public:
+ using TBase = TBaseClass;
+ private:
+ using TThisClass = TPyClass<TBaseClass, TPyClassConfigTraits, ConstructorArgs...>;
+ using TContext = Detail::TContextImpl<TBase>;
+ struct THolder {
+ ::THolder<TBase> Holder;
+ THolder(::THolder<TBase>&& right)
+ : Holder(std::move(right))
+ {
+ }
+ THolder(TBase&& right)
+ : Holder(MakeHolder<TBase>(std::move(right)))
+ {
+ }
+ };
+
+ class TSelectedTraits: public NPyBind::TPythonType<THolder, TBase, TSelectedTraits> {
+ private:
+ using TParent = NPyBind::TPythonType<THolder, TBase, TSelectedTraits>;
+ friend TParent;
+
+ public:
+ TSelectedTraits()
+ : TParent(TThisClass::GetContext().ClassFullName.data(), TThisClass::GetContext().ClassDescription.data(), TThisClass::GetContext().ParentType)
+ {
+ for (const auto& caller : TThisClass::GetContext().ListCallers) {
+ TParent::AddCaller(caller.first, caller.second);
+ }
+
+ for (const auto& getter : TThisClass::GetContext().ListGetters) {
+ TParent::AddGetter(getter.first, getter.second);
+ }
+
+ for (const auto& setter : TThisClass::GetContext().ListSetters) {
+ TParent::AddSetter(setter.first, setter.second);
+ }
+ }
+
+ static TBase* GetObject(const THolder& holder) {
+ return holder.Holder.Get();
+ }
+
+ static THolder* DoInitObject(PyObject* args, PyObject* kwargs) {
+ if constexpr (TPyClassConfigTraits::InitEnabled) {
+ if constexpr (TPyClassConfigTraits::RawInit) {
+ static_assert(sizeof...(ConstructorArgs) == 0, "Do not pass construction args if use RawInit.");
+ return new THolder(::MakeHolder<TBase>(args, kwargs));
+ } else {
+ if (args && (!PyTuple_Check(args) || PyTuple_Size(args) != sizeof...(ConstructorArgs))) {
+ ythrow yexception() << "Method takes " << sizeof...(ConstructorArgs) << " arguments, " << PyTuple_Size(args) << " provided";
+ }
+ ::THolder<TBaseClass> basePtr{Apply([](auto&&... unpackedArgs) {return new TBase(std::forward<decltype(unpackedArgs)>(unpackedArgs)...); }, GetArguments<ConstructorArgs...>(args))};
+ return new THolder(std::move(basePtr));
+ }
+ } else {
+ ythrow yexception() << "Can't create this object from python";
+ }
+ }
+
+ static THolder* DoInitPureObject(const TVector<TString>& properties) {
+ return Detail::DoInitPureObject<THolder, TBase, TPyClassConfigTraits::InitEnabled>(properties);
+ }
+
+ static TBase* CastToObject(PyObject* obj) {
+ return TParent::CastToObject(obj);
+ }
+
+ static PyTypeObject* GetType() {
+ return TParent::GetPyTypePtr();
+ }
+ };
+
+ class TContextHolder: public Detail::IGetContext<TBaseClass> {
+ public:
+ static TContextHolder& GetContextHolder() {
+ static TContextHolder holder;
+ return holder;
+ }
+
+ TContext& GetContext() {
+ return Context;
+ }
+ const TContext& GetContext() const override {
+ return Context;
+ }
+ private:
+ TContext Context;
+ };
+
+ template <class TDerivedClass, class TSuperClass>
+ class TCallerWrapper: public TBaseMethodCaller<TDerivedClass> {
+ public:
+ explicit TCallerWrapper(TSimpleSharedPtr<const TBaseMethodCaller<TSuperClass>> baseCaller)
+ : BaseCaller(baseCaller) {
+ Y_ENSURE(BaseCaller);
+ }
+
+ bool CallMethod(PyObject* owner, TDerivedClass* self, PyObject* args, PyObject* kwargs, PyObject*& res) const override {
+ return BaseCaller->CallMethod(owner, static_cast<TSuperClass*>(self), args, kwargs, res);
+ }
+
+ private:
+ TSimpleSharedPtr<const TBaseMethodCaller<TSuperClass>> BaseCaller;
+ };
+
+ template <class TDerivedClass, class TSuperClass>
+ class TSetterWrapper: public TBaseAttrSetter<TDerivedClass> {
+ public:
+ explicit TSetterWrapper(TSimpleSharedPtr<TBaseAttrSetter<TSuperClass>> baseSetter)
+ : BaseSetter(baseSetter) {
+ Y_ENSURE(BaseSetter);
+ }
+
+ bool SetAttr(PyObject* owner, TDerivedClass& self, const TString& attr, PyObject* val) override {
+ return BaseSetter->SetAttr(owner, static_cast<TSuperClass&>(self), attr, val);
+ }
+
+ private:
+ TSimpleSharedPtr<TBaseAttrSetter<TSuperClass>> BaseSetter;
+ };
+
+ template <class TDerivedClass, class TSuperClass>
+ class TGetterWrapper: public TBaseAttrGetter<TDerivedClass> {
+ public:
+ explicit TGetterWrapper(TSimpleSharedPtr<const TBaseAttrGetter<TSuperClass>> baseGetter)
+ : BaseGetter(baseGetter) {
+ Y_ENSURE(BaseGetter);
+ }
+
+ bool GetAttr(PyObject* owner, const TDerivedClass& self, const TString& attr, PyObject*& res) const override {
+ return BaseGetter->GetAttr(owner, static_cast<const TSuperClass&>(self), attr, res);
+ }
+
+ private:
+ TSimpleSharedPtr<const TBaseAttrGetter<TSuperClass>> BaseGetter;
+ };
+
+ template <class TSuperClass, typename=std::enable_if_t<!std::is_same_v<TSuperClass, void>>>
+ void ReloadAttrsFromBase() {
+ auto shortName = Detail::TNameCtx<TSuperClass>::GetNameCtx().ClassShortName;
+ if (!M.Class2ContextGetter.count(shortName)) {
+ return;
+ }
+ auto callerBasePtr = M.Class2ContextGetter[shortName];
+ if (auto getContextPtr = dynamic_cast<const Detail::IGetContext<TSuperClass>*>(callerBasePtr)) {
+ auto& ctx = getContextPtr->GetContext();
+ auto getUniqueNames = [](const auto& collection) {
+ THashSet<TString> uniqueNames;
+ for (const auto& elem : collection) {
+ uniqueNames.insert(elem.first);
+ }
+ return uniqueNames;
+ };
+
+ auto uniqueCallerNames = getUniqueNames(GetContext().ListCallers);
+ using TConcreteCallerWrapper = TCallerWrapper<TBaseClass, TSuperClass>;
+ for (const auto& caller : ctx.ListCallers) {
+ if (uniqueCallerNames.contains(caller.first)) {
+ continue;
+ }
+ GetContext().ListCallers.push_back(std::make_pair(caller.first, MakeSimpleShared<TConcreteCallerWrapper>(caller.second)));
+ }
+
+ auto uniqueGettersNames = getUniqueNames(GetContext().ListGetters);
+ using TConcreteGetterWrapper = TGetterWrapper<TBaseClass, TSuperClass>;
+ for (const auto& getter : ctx.ListGetters) {
+ if (uniqueGettersNames.contains(getter.first)) {
+ continue;
+ }
+ GetContext().ListGetters.push_back(std::make_pair(getter.first, MakeSimpleShared<TConcreteGetterWrapper>(getter.second)));
+ }
+
+ auto uniqueSetterNames = getUniqueNames(GetContext().ListSetters);
+ using TConcreteSetterWrapper = TSetterWrapper<TBaseClass, TSuperClass>;
+ for (auto& setter : ctx.ListSetters) {
+ if (uniqueSetterNames.contains(setter.first)) {
+ continue;
+ }
+ GetContext().ListSetters.push_back(std::make_pair(setter.first, MakeSimpleShared<TConcreteSetterWrapper>(setter.second)));
+ }
+ }
+ }
+
+ template <class TSuperClass, typename=std::enable_if_t<std::is_same_v<TSuperClass, void>>, typename=void>
+ void ReloadAttrsFromBase() {
+ }
+
+ void CompleteImpl() {
+ ReloadAttrsFromBase<typename TPyClassConfigTraits::TParentPyClass>();
+ TSelectedTraits::Instance().Register(M.M, GetContext().ClassShortName);
+ }
+
+ static TContext& GetContext() {
+ return TContextHolder::GetContextHolder().GetContext();
+ }
+
+
+ friend struct Detail::TContextImpl<TBase>;//instead of context
+ friend struct THolder;
+ friend class TSelectedTraits;
+
+ using TCallerFunc = std::function<bool(PyObject*, TBaseClass*, PyObject*, PyObject*, PyObject*&)>;
+ class TFuncCallerWrapper: public TBaseMethodCaller<TBaseClass> {
+ public:
+ explicit TFuncCallerWrapper(TCallerFunc func)
+ : Func(func) {
+ Y_ENSURE(func);
+ }
+
+ bool CallMethod(PyObject* owner, TBaseClass* self, PyObject* args, PyObject* kwargs, PyObject*& res) const override {
+ return Func(owner, self, args, kwargs, res);
+ }
+ private:
+ mutable TCallerFunc Func;
+ };
+ public:
+ TPyClass(const TString& name, const TString& descr = "")
+ : M(TPyModuleDefinition::GetModule())
+ {
+ Detail::UpdateClassNamesInModule<TPyClassConfigTraits::InitEnabled>(M, name, TSelectedTraits::GetType());
+ Detail::UpdateGetContextInModule<TPyClassConfigTraits::InitEnabled>(M, name, &TContextHolder::GetContextHolder());
+
+ GetContext().ClassFullName = TString::Join(M.Name, ".", name);
+ GetContext().ClassShortName = name;
+ GetContext().ClassDescription = descr;
+ GetContext().ParentType = Detail::GetParentType<typename TPyClassConfigTraits::TParentPyClass>(M);
+ Detail::TNameCtx<TBaseClass>::GetNameCtx().ClassShortName = name;
+ }
+
+ template <typename TMemberFuction, typename = std::enable_if_t<std::is_member_function_pointer_v<TMemberFuction>>, typename=std::enable_if_t<!TIsPointerToConstMemberFunction<TMemberFuction>::value>>
+ TThisClass& Def(const TString& name, TMemberFuction t) {
+ GetContext().ListCallers.push_back(std::make_pair(name, CreateMethodCaller<TBase>(t)));
+ return *this;
+ }
+
+ template <typename TMemberFuction, typename = std::enable_if_t<std::is_member_function_pointer_v<TMemberFuction>>, typename=std::enable_if_t<TIsPointerToConstMemberFunction<TMemberFuction>::value>, typename=void>
+ TThisClass& Def(const TString& name, TMemberFuction t) {
+ GetContext().ListCallers.push_back(std::make_pair(name, CreateConstMethodCaller<TBase>(t)));
+ return *this;
+ }
+
+ template <typename TMemberObject, typename = std::enable_if_t<std::is_member_object_pointer_v<TMemberObject>>>
+ TThisClass& Def(const TString& name, TMemberObject t) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateAttrGetter<TBase>(t)));
+ GetContext().ListSetters.push_back(std::make_pair(name, CreateAttrSetter<TBase>(t)));
+ return *this;
+ }
+
+ template <typename TResultType, typename... Args>
+ TThisClass& DefByFunc(const TString& name, std::function<TResultType(TBaseClass&, Args...)> func) {
+ GetContext().ListCallers.push_back(std::make_pair(name, CreateFunctorCaller<TBase, TResultType, Args...>(func)));
+ return *this;
+ }
+
+ TThisClass& DefByFunc(const TString& name, TCallerFunc origFunc) {
+ GetContext().ListCallers.push_back(std::make_pair(name, MakeSimpleShared<TFuncCallerWrapper>(origFunc)));
+ return *this;
+ }
+
+ template <typename TMemberObject>
+ TThisClass& DefReadonly(const TString& name, TMemberObject t, std::enable_if_t<std::is_member_object_pointer<TMemberObject>::value>* = nullptr) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateAttrGetter<TBase>(t)));
+ return *this;
+ }
+
+
+ template <typename TMethodGetter, typename TMethodSetter, typename=std::enable_if_t<std::is_member_function_pointer_v<TMethodGetter> && std::is_member_function_pointer_v<TMethodSetter>>>
+ TThisClass& AsProperty(const TString& name, TMethodGetter getter, TMethodSetter setter) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateMethodAttrGetter<TBase>(getter)));
+ GetContext().ListSetters.push_back(std::make_pair(name, CreateMethodAttrSetter<TBase>(setter)));
+ return *this;
+ }
+
+ template <typename TMethodGetter, typename TMethodSetter, typename=std::enable_if_t<!std::is_member_function_pointer_v<TMethodGetter> && !std::is_member_function_pointer_v<TMethodSetter>>>
+ TThisClass& AsPropertyByFunc(const TString& name, TMethodGetter getter, TMethodSetter setter) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateFunctorAttrGetter<TBase>(getter)));
+ GetContext().ListSetters.push_back(std::make_pair(name, CreateFunctorAttrSetter<TBase>(setter)));
+ return *this;
+ }
+
+ template <typename TMethodGetter, typename=std::enable_if_t<std::is_member_function_pointer_v<TMethodGetter>>>
+ TThisClass& AsProperty(const TString& name, TMethodGetter getter) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateMethodAttrGetter<TBase>(getter)));
+ return *this;
+ }
+
+ template <typename TMethodGetter>
+ TThisClass& AsPropertyByFunc(const TString& name, TMethodGetter getter) {
+ GetContext().ListGetters.push_back(std::make_pair(name, CreateFunctorAttrGetter<TBase>(getter)));
+ return *this;
+ }
+
+ TThisClass& Complete() {
+ if (!Completed) {
+ CompleteImpl();
+ Completed = true;
+ }
+ return *this;
+ }
+
+ public:
+ static PyObject* BuildPyObject(TBase&& base) {
+ return NPyBind::BuildPyObject(TSelectedTraits::Instance().CreatePyObject(new THolder(std::move(base))));
+ }
+
+ static PyObject* BuildPyObject(const TBase& base) {
+ return NPyBind::BuildPyObject(TSelectedTraits::Instance().CreatePyObject(new THolder(TBase(base)))); // WARN - copy
+ }
+
+ static TBase* CastToObject(PyObject* obj) {
+ return TSelectedTraits::CastToObject(obj);
+ }
+
+ private:
+ TPyModuleDefinition& M;
+ bool Completed = false;
+ };
+
+ template <typename TFunctionSignature, TFunctionSignature function>
+ void DefImpl(const TString& name, const TString& descr = "") {
+ NPyBind::TModuleHolder::Instance().AddModuleMethod<TModuleMethodCaller<TFunctionSignature, function>::Call>(name, descr);
+ }
+
+#define DefFunc(NAME, FUNC) NPyBind::DefImpl<decltype(FUNC), FUNC>(NAME)
+#define DefFuncDescr(NAME, FUNC, DESCR) NPyBind::DefImpl<decltype(FUNC), FUNC>(NAME, DESCR)
+};
diff --git a/library/cpp/pybind/ya.make b/library/cpp/pybind/ya.make
new file mode 100644
index 0000000000..9b7b3413f2
--- /dev/null
+++ b/library/cpp/pybind/ya.make
@@ -0,0 +1,14 @@
+PY23_NATIVE_LIBRARY()
+
+SRCS(
+ cast.cpp
+ pod.cpp
+ typedesc.cpp
+ module.cpp
+ exceptions.cpp
+ embedding.cpp
+ empty.cpp
+ v2.cpp
+)
+
+END()
diff --git a/library/cpp/remmap/remmap.cpp b/library/cpp/remmap/remmap.cpp
new file mode 100644
index 0000000000..ce72af7352
--- /dev/null
+++ b/library/cpp/remmap/remmap.cpp
@@ -0,0 +1,138 @@
+#include <util/system/info.h>
+#include <util/system/defaults.h>
+
+#if defined(_win_)
+#include <util/system/winint.h>
+#elif defined(_unix_)
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#ifndef MAP_NOCORE
+#define MAP_NOCORE 0
+#endif
+#else
+#error todo
+#endif
+
+#include "remmap.h"
+
+static const size_t REMMAP_PAGESIZE = NSystemInfo::GetPageSize();
+
+#if defined(_unix_)
+TRemmapAllocation::TRemmapAllocation()
+ : Ptr_(nullptr)
+ , Size_(0)
+{
+}
+
+TRemmapAllocation::TRemmapAllocation(size_t size, char* base)
+ : Ptr_(nullptr)
+ , Size_(0)
+{
+ Alloc(size, base);
+}
+
+char* TRemmapAllocation::Alloc(size_t size, char* base) {
+ assert(Ptr_ == nullptr);
+
+ if (!size)
+ return nullptr;
+
+ const size_t HUGESIZE = size_t(16) << 30;
+ Ptr_ = CommonMMap(HUGESIZE, base);
+
+ if (Ptr_ != (char*)MAP_FAILED)
+ munmap((void*)Ptr_, HUGESIZE);
+ else
+ Ptr_ = nullptr;
+
+ Ptr_ = CommonMMap(AlignUp(size, REMMAP_PAGESIZE), Ptr_);
+ if (Ptr_ == (char*)MAP_FAILED)
+ Ptr_ = nullptr;
+
+ Size_ = Ptr_ ? size : 0;
+ return Ptr_;
+}
+
+char* TRemmapAllocation::Realloc(size_t newsize) {
+ if (Ptr_ == nullptr)
+ return Alloc(newsize);
+
+ size_t realSize = AlignUp(Size_, REMMAP_PAGESIZE);
+ size_t needSize = AlignUp(newsize, REMMAP_PAGESIZE);
+
+ if (needSize > realSize) {
+ char* part = Ptr_ + realSize;
+ char* bunch = CommonMMap(needSize - realSize, part);
+ if (bunch != (char*)MAP_FAILED && bunch != part)
+ munmap(bunch, needSize - realSize);
+ if (bunch == (char*)MAP_FAILED || bunch != part)
+ return FullRealloc(newsize);
+ } else if (needSize < realSize)
+ munmap(Ptr_ + needSize, realSize - needSize);
+
+ if ((Size_ = newsize) == 0)
+ Ptr_ = nullptr;
+
+ return Ptr_;
+}
+
+void TRemmapAllocation::Dealloc() {
+ if (Ptr_ != nullptr)
+ munmap(Ptr_, AlignUp(Size_, REMMAP_PAGESIZE));
+ Ptr_ = nullptr;
+ Size_ = 0;
+}
+
+char* TRemmapAllocation::FullRealloc(size_t newsize) {
+ char* newPtr = CommonMMap(newsize);
+ Y_ABORT_UNLESS(newPtr != MAP_FAILED, "mmap failed");
+
+ size_t useful = Min(Size_, newsize), cur = 0;
+
+ for (; cur + REMMAP_PAGESIZE < useful; cur += REMMAP_PAGESIZE) {
+ memcpy((void*)&newPtr[cur], (void*)&Ptr_[cur], REMMAP_PAGESIZE);
+ munmap((void*)&Ptr_[cur], REMMAP_PAGESIZE);
+ }
+
+ memcpy((void*)&newPtr[cur], (void*)&Ptr_[cur], useful - cur);
+ munmap((void*)&Ptr_[cur], AlignUp(Size_ - cur, REMMAP_PAGESIZE));
+
+ Size_ = newsize;
+ return (Ptr_ = newPtr);
+}
+
+inline char* TRemmapAllocation::CommonMMap(size_t size, char* base) {
+ return (char*)mmap((void*)base, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+}
+
+#else
+TRemmapAllocation::TRemmapAllocation()
+ : Allocation_(0, false, NULL)
+{
+}
+
+TRemmapAllocation::TRemmapAllocation(size_t size, char* base)
+ : Allocation_(size, false, (void*)base)
+{
+}
+
+char* TRemmapAllocation::Alloc(size_t size, char* base) {
+ return (char*)Allocation_.Alloc(size, (void*)base);
+}
+
+char* TRemmapAllocation::Realloc(size_t newsize) {
+ return FullRealloc(newsize);
+}
+
+void TRemmapAllocation::Dealloc() {
+ Allocation_.Dealloc();
+}
+
+char* TRemmapAllocation::FullRealloc(size_t newsize) {
+ TMappedAllocation other(newsize);
+ memcpy(other.Ptr(), Allocation_.Ptr(), Min(other.MappedSize(), Allocation_.MappedSize()));
+ Allocation_.swap(other);
+ return Data();
+}
+#endif
diff --git a/library/cpp/remmap/remmap.h b/library/cpp/remmap/remmap.h
new file mode 100644
index 0000000000..7cb738f7ae
--- /dev/null
+++ b/library/cpp/remmap/remmap.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include <util/system/yassert.h>
+#include <util/system/align.h>
+#include <util/system/info.h>
+#include <util/system/filemap.h>
+#include <util/memory/alloc.h>
+#include <util/generic/noncopyable.h>
+
+class TRemmapAllocation : TNonCopyable {
+public:
+ TRemmapAllocation();
+ TRemmapAllocation(size_t size, char* base = nullptr);
+
+ ~TRemmapAllocation() {
+ Dealloc();
+ }
+
+ char* Alloc(size_t size, char* base = nullptr);
+ char* Realloc(size_t newsize);
+ void Dealloc();
+ char* FullRealloc(size_t newsize);
+
+#if defined(_unix_)
+private:
+ inline char* CommonMMap(size_t size, char* base = nullptr);
+
+ char* Ptr_;
+ size_t Size_;
+
+public:
+ inline void* Ptr() const {
+ return (void*)Ptr_;
+ }
+ inline char* Data(ui32 pos = 0) const {
+ return Ptr_ + pos;
+ }
+ inline size_t Size() const {
+ return Size_;
+ }
+ inline void swap(TRemmapAllocation& other) {
+ DoSwap(Ptr_, other.Ptr_);
+ DoSwap(Size_, other.Size_);
+ }
+
+#else
+private:
+ TMappedAllocation Allocation_;
+
+public:
+ inline void* Ptr() const {
+ return Allocation_.Ptr();
+ }
+ inline char* Data(ui32 pos = 0) const {
+ return Allocation_.Data(pos);
+ }
+ inline size_t Size() const {
+ return Allocation_.MappedSize();
+ }
+ inline void swap(TRemmapAllocation& other) {
+ Allocation_.swap(other.Allocation_);
+ }
+#endif
+};
diff --git a/library/cpp/remmap/ya.make b/library/cpp/remmap/ya.make
new file mode 100644
index 0000000000..281df6443a
--- /dev/null
+++ b/library/cpp/remmap/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ remmap.cpp
+)
+
+END()
diff --git a/library/cpp/sqlite3/sqlite.cpp b/library/cpp/sqlite3/sqlite.cpp
new file mode 100644
index 0000000000..98e498f76b
--- /dev/null
+++ b/library/cpp/sqlite3/sqlite.cpp
@@ -0,0 +1,288 @@
+#include "sqlite.h"
+
+#include <util/generic/singleton.h>
+#include <util/generic/scope.h>
+
+#include <cstdlib>
+
+using namespace NSQLite;
+
+namespace {
+ struct TSQLiteInit {
+ inline TSQLiteInit() {
+ int ret = sqlite3_config(SQLITE_CONFIG_MULTITHREAD);
+
+ if (ret != SQLITE_OK) {
+ ythrow TSQLiteError(ret) << "init failure";
+ }
+ }
+
+ static inline void Ensure() {
+ Singleton<TSQLiteInit>();
+ }
+ };
+}
+
+namespace NSQLite {
+ TSQLiteError::TSQLiteError(sqlite3* hndl)
+ : ErrorCode(sqlite3_errcode(hndl))
+ {
+ *this << sqlite3_errmsg(hndl) << ". ";
+ }
+
+ TSQLiteError::TSQLiteError(int rc)
+ : ErrorCode(rc)
+ {
+ *this << sqlite3_errstr(rc) << " (" << rc << "). ";
+ }
+
+ TSQLiteDB::TSQLiteDB(const TString& path) {
+ TSQLiteInit::Ensure();
+
+ sqlite3* db = nullptr;
+ const int rc = sqlite3_open(path.data(), &db);
+
+ H_.Reset(db);
+
+ if (rc) {
+ ythrow TSQLiteError(Handle()) << "can not init db " << path.Quote();
+ }
+ }
+
+ TSQLiteDB::TSQLiteDB(const TString& path, int flags) {
+ TSQLiteInit::Ensure();
+
+ sqlite3* db = nullptr;
+ const int rc = sqlite3_open_v2(path.data(), &db, flags, nullptr);
+
+ H_.Reset(db);
+
+ if (rc) {
+ ythrow TSQLiteError(Handle()) << "can not init db " << path.Quote();
+ }
+ }
+
+ sqlite3* TSQLiteDB::Handle() const noexcept {
+ return H_.Get();
+ }
+
+ size_t TSQLiteDB::RowsAffected() const noexcept {
+ return static_cast<size_t>(sqlite3_changes(H_.Get()));
+ }
+
+ TSQLiteStatement::TSQLiteStatement(TSQLiteDB& db, const TString& s)
+ : S_(s)
+ {
+ if (!S_.empty() && S_[S_.size() - 1] != ';') {
+ S_ += ';';
+ }
+
+ sqlite3_stmt* st = nullptr;
+ const char* tail = nullptr;
+ const int rc = sqlite3_prepare_v2(db.Handle(), S_.data(), S_.size() + 1, &st, &tail);
+
+ H_.Reset(st);
+
+ if (rc != SQLITE_OK) {
+ ythrow TSQLiteError(db.Handle()) << "can not prepare " << S_.Quote();
+ }
+ }
+
+ void TSQLiteStatement::Execute() {
+ while (Step()) {
+ }
+
+ Reset();
+ }
+
+ TSQLiteStatement& TSQLiteStatement::Bind(size_t idx, i64 val) {
+ sqlite3_bind_int64(Handle(), idx, val);
+ return *this;
+ }
+
+ TSQLiteStatement& TSQLiteStatement::Bind(size_t idx, int val) {
+ sqlite3_bind_int(Handle(), idx, val);
+ return *this;
+ }
+
+ TSQLiteStatement& TSQLiteStatement::Bind(size_t idx) {
+ sqlite3_bind_null(Handle(), idx);
+ return *this;
+ }
+
+ TSQLiteStatement& TSQLiteStatement::Bind(size_t idx, double val) {
+ sqlite3_bind_double(Handle(), idx, val);
+ return *this;
+ }
+
+ void TSQLiteStatement::BindText(size_t idx, const char* text, size_t len, TFreeFunc func) {
+ sqlite3_bind_text(Handle(), idx, text, len, func);
+ }
+
+ TSQLiteStatement& TSQLiteStatement::Bind(size_t idx, TStringBuf str) {
+ BindText(idx, str.data(), str.size(), SQLITE_STATIC);
+ return *this;
+ }
+
+ TSQLiteStatement& TSQLiteStatement::BindBlob(size_t idx, TStringBuf blob) {
+ sqlite3_bind_blob(Handle(), idx, blob.data(), blob.size(), SQLITE_STATIC);
+ return *this;
+ }
+
+ size_t TSQLiteStatement::BoundNamePosition(TStringBuf name) const noexcept {
+ return sqlite3_bind_parameter_index(Handle(), name.data());
+ }
+
+ size_t TSQLiteStatement::BoundParameterCount() const noexcept {
+ return sqlite3_bind_parameter_count(Handle());
+ }
+
+ const char* TSQLiteStatement::BoundParameterName(size_t idx) const noexcept {
+ return sqlite3_bind_parameter_name(Handle(), idx);
+ }
+
+ sqlite3_stmt* TSQLiteStatement::Handle() const noexcept {
+ return H_.Get();
+ }
+
+ bool TSQLiteStatement::Step() {
+ const int rc = sqlite3_step(Handle());
+
+ switch (rc) {
+ case SQLITE_ROW:
+ return true;
+
+ case SQLITE_DONE:
+ return false;
+
+ default:
+ break;
+ }
+
+ char* stmt = rc == SQLITE_CONSTRAINT ? sqlite3_expanded_sql(Handle()) : nullptr;
+ Y_DEFER {
+ if (stmt != nullptr) {
+ sqlite3_free(reinterpret_cast<void*>(stmt));
+ stmt = nullptr;
+ }
+ };
+ if (stmt != nullptr) {
+ ythrow TSQLiteError(rc) << "step failed: " << stmt;
+ } else {
+ ythrow TSQLiteError(rc) << "step failed";
+ }
+ }
+
+ i64 TSQLiteStatement::ColumnInt64(size_t idx) {
+ return sqlite3_column_int64(Handle(), idx);
+ }
+
+ double TSQLiteStatement::ColumnDouble(size_t idx) {
+ return sqlite3_column_double(Handle(), idx);
+ }
+
+ TStringBuf TSQLiteStatement::ColumnText(size_t idx) {
+ return reinterpret_cast<const char*>(sqlite3_column_text(Handle(), idx));
+ }
+
+ TStringBuf TSQLiteStatement::ColumnBlob(size_t idx) {
+ const void* blob = sqlite3_column_blob(Handle(), idx);
+ size_t size = sqlite3_column_bytes(Handle(), idx);
+ return TStringBuf(static_cast<const char*>(blob), size);
+ }
+
+ void TSQLiteStatement::ColumnAccept(size_t idx, ISQLiteColumnVisitor& visitor) {
+ const auto columnType = sqlite3_column_type(Handle(), idx);
+ switch (columnType) {
+ case SQLITE_INTEGER:
+ visitor.OnColumnInt64(ColumnInt64(idx));
+ break;
+ case SQLITE_FLOAT:
+ visitor.OnColumnDouble(ColumnDouble(idx));
+ break;
+ case SQLITE_TEXT:
+ visitor.OnColumnText(ColumnText(idx));
+ break;
+ case SQLITE_BLOB:
+ visitor.OnColumnBlob(ColumnBlob(idx));
+ break;
+ case SQLITE_NULL:
+ visitor.OnColumnNull();
+ break;
+ }
+ }
+
+ size_t TSQLiteStatement::ColumnCount() const noexcept {
+ return static_cast<size_t>(sqlite3_column_count(Handle()));
+ }
+
+ TStringBuf TSQLiteStatement::ColumnName(size_t idx) const noexcept {
+ return sqlite3_column_name(Handle(), idx);
+ }
+
+ void TSQLiteStatement::Reset() {
+ const int rc = sqlite3_reset(Handle());
+
+ if (rc != SQLITE_OK) {
+ ythrow TSQLiteError(rc) << "reset failed";
+ }
+ }
+
+ void TSQLiteStatement::ResetHard() {
+ (void)sqlite3_reset(Handle());
+ }
+
+ void TSQLiteStatement::ClearBindings() noexcept {
+ // No error is documented.
+ // sqlite3.c's code always returns SQLITE_OK.
+ (void)sqlite3_clear_bindings(Handle());
+ }
+
+ TSQLiteTransaction::TSQLiteTransaction(TSQLiteDB& db)
+ : Db(&db)
+ {
+ Execute("BEGIN TRANSACTION");
+ }
+
+ TSQLiteTransaction::~TSQLiteTransaction() {
+ if (Db) {
+ Rollback();
+ }
+ }
+
+ void TSQLiteTransaction::Commit() {
+ Execute("COMMIT TRANSACTION");
+ Db = nullptr;
+ }
+
+ void TSQLiteTransaction::Rollback() {
+ Execute("ROLLBACK TRANSACTION");
+ Db = nullptr;
+ }
+
+ void TSQLiteTransaction::Execute(const TString& query) {
+ Y_ENSURE(Db, "Transaction is already ended");
+ TSQLiteStatement st(*Db, query);
+ st.Execute();
+ }
+
+ TSimpleDB::TSimpleDB(const TString& path)
+ : TSQLiteDB(path)
+ , Start_(*this, "begin transaction")
+ , End_(*this, "end transaction")
+ {
+ }
+
+ void TSimpleDB::Execute(const TString& statement) {
+ TSQLiteStatement(*this, statement).Execute();
+ }
+
+ void TSimpleDB::Acquire() {
+ Start_.Execute();
+ }
+
+ void TSimpleDB::Release() {
+ End_.Execute();
+ }
+
+}
diff --git a/library/cpp/sqlite3/sqlite.h b/library/cpp/sqlite3/sqlite.h
new file mode 100644
index 0000000000..8b35e2606a
--- /dev/null
+++ b/library/cpp/sqlite3/sqlite.h
@@ -0,0 +1,136 @@
+#pragma once
+
+#include <util/generic/yexception.h>
+#include <util/generic/ptr.h>
+
+#include <contrib/libs/sqlite3/sqlite3.h>
+
+namespace NSQLite {
+ class TSQLiteError: public yexception {
+ public:
+ TSQLiteError(sqlite3* hndl);
+ TSQLiteError(int rc);
+
+ int GetErrorCode() const {
+ return ErrorCode;
+ }
+
+ private:
+ int ErrorCode;
+ };
+
+ template <class T, int (*Func)(T*)>
+ struct TCFree {
+ static void Destroy(T* t) {
+ Func(t);
+ }
+ };
+
+ class TSQLiteDB {
+ public:
+ TSQLiteDB(const TString& path, int flags);
+ TSQLiteDB(const TString& path);
+
+ sqlite3* Handle() const noexcept;
+ size_t RowsAffected() const noexcept;
+
+ private:
+ THolder<sqlite3, TCFree<sqlite3, sqlite3_close>> H_;
+ };
+
+ class ISQLiteColumnVisitor {
+ public:
+ virtual ~ISQLiteColumnVisitor() = default;
+
+ virtual void OnColumnInt64(i64 value) = 0;
+ virtual void OnColumnDouble(double value) = 0;
+ virtual void OnColumnText(TStringBuf value) = 0;
+ virtual void OnColumnBlob(TStringBuf value) = 0;
+ virtual void OnColumnNull() = 0;
+ };
+
+ class TSQLiteStatement {
+ public:
+ TSQLiteStatement(TSQLiteDB& db, const TString& s);
+
+ void Execute();
+ TSQLiteStatement& Bind(size_t idx, i64 val);
+ TSQLiteStatement& Bind(size_t idx, int val);
+ TSQLiteStatement& Bind(size_t idx);
+ TSQLiteStatement& Bind(size_t idx, double val);
+ TSQLiteStatement& Bind(size_t idx, TStringBuf str);
+ TSQLiteStatement& BindBlob(size_t idx, TStringBuf blob);
+ template <typename Value>
+ TSQLiteStatement& Bind(TStringBuf name, Value val) {
+ size_t idx = BoundNamePosition(name);
+ Y_ASSERT(idx > 0);
+ return Bind(idx, val);
+ }
+ TSQLiteStatement& BindBlob(TStringBuf name, TStringBuf blob) {
+ size_t idx = BoundNamePosition(name);
+ Y_ASSERT(idx > 0);
+ return BindBlob(idx, blob);
+ }
+ TSQLiteStatement& Bind(TStringBuf name) {
+ size_t idx = BoundNamePosition(name);
+ Y_ASSERT(idx > 0);
+ return Bind(idx);
+ }
+ size_t BoundNamePosition(TStringBuf name) const noexcept;
+ size_t BoundParameterCount() const noexcept;
+ const char* BoundParameterName(size_t idx) const noexcept;
+
+ sqlite3_stmt* Handle() const noexcept;
+ bool Step();
+ i64 ColumnInt64(size_t idx);
+ double ColumnDouble(size_t idx);
+ TStringBuf ColumnText(size_t idx);
+ TStringBuf ColumnBlob(size_t idx);
+ void ColumnAccept(size_t idx, ISQLiteColumnVisitor& visitor);
+ size_t ColumnCount() const noexcept;
+ TStringBuf ColumnName(size_t idx) const noexcept;
+ void Reset();
+ // Ignore last error on this statement
+ void ResetHard();
+ void ClearBindings() noexcept;
+
+ private:
+ typedef void (*TFreeFunc)(void*);
+ void BindText(size_t col, const char* text, size_t len, TFreeFunc func);
+
+ private:
+ TString S_;
+ THolder<sqlite3_stmt, TCFree<sqlite3_stmt, sqlite3_finalize>> H_;
+ };
+
+ /**
+ * Forces user to commit transaction explicitly, to not get exception in destructor (with all consequences of it).
+ */
+ class TSQLiteTransaction: private TNonCopyable {
+ private:
+ TSQLiteDB* Db;
+
+ public:
+ TSQLiteTransaction(TSQLiteDB& db);
+ ~TSQLiteTransaction();
+
+ void Commit();
+ void Rollback();
+
+ private:
+ void Execute(const TString& query);
+ };
+
+ class TSimpleDB: public TSQLiteDB {
+ public:
+ TSimpleDB(const TString& path);
+
+ void Execute(const TString& statement);
+ void Acquire();
+ void Release();
+
+ private:
+ TSQLiteStatement Start_;
+ TSQLiteStatement End_;
+ };
+}
diff --git a/library/cpp/sqlite3/ya.make b/library/cpp/sqlite3/ya.make
new file mode 100644
index 0000000000..15417e278d
--- /dev/null
+++ b/library/cpp/sqlite3/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+SRCS(
+ sqlite.cpp
+)
+
+PEERDIR(
+ contrib/libs/sqlite3
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)
diff --git a/library/cpp/streams/growing_file_input/growing_file_input.cpp b/library/cpp/streams/growing_file_input/growing_file_input.cpp
new file mode 100644
index 0000000000..0bbfa5ade9
--- /dev/null
+++ b/library/cpp/streams/growing_file_input/growing_file_input.cpp
@@ -0,0 +1,40 @@
+#include "growing_file_input.h"
+
+#include <util/datetime/base.h>
+#include <util/generic/yexception.h>
+
+TGrowingFileInput::TGrowingFileInput(const TString& path)
+ : File_(path, OpenExisting | RdOnly | Seq)
+{
+ if (!File_.IsOpen()) {
+ ythrow TIoException() << "file " << path << " not open";
+ }
+
+ File_.Seek(0, sEnd);
+}
+
+TGrowingFileInput::TGrowingFileInput(const TFile& file)
+ : File_(file)
+{
+ if (!File_.IsOpen()) {
+ ythrow TIoException() << "file (" << file.GetName() << ") not open";
+ }
+
+ File_.Seek(0, sEnd);
+}
+
+size_t TGrowingFileInput::DoRead(void* buf, size_t len) {
+ for (int sleepTime = 1;;) {
+ size_t rr = File_.Read(buf, len);
+
+ if (rr != 0) {
+ return rr;
+ }
+
+ NanoSleep((ui64)sleepTime * 1000000);
+
+ if (sleepTime < 2000) {
+ sleepTime <<= 1;
+ }
+ }
+}
diff --git a/library/cpp/streams/growing_file_input/growing_file_input.h b/library/cpp/streams/growing_file_input/growing_file_input.h
new file mode 100644
index 0000000000..9054a5f3da
--- /dev/null
+++ b/library/cpp/streams/growing_file_input/growing_file_input.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <util/stream/input.h>
+#include <util/system/file.h>
+
+/**
+ * Growing file input stream.
+ *
+ * File descriptor offsets to the end of the file, when the object is created.
+ *
+ * Read function waites for reading at least one byte.
+ */
+class TGrowingFileInput: public IInputStream {
+public:
+ TGrowingFileInput(const TFile& file);
+ TGrowingFileInput(const TString& path);
+
+private:
+ size_t DoRead(void* buf, size_t len) override;
+
+private:
+ TFile File_;
+};
diff --git a/library/cpp/streams/growing_file_input/ya.make b/library/cpp/streams/growing_file_input/ya.make
new file mode 100644
index 0000000000..69c56fea46
--- /dev/null
+++ b/library/cpp/streams/growing_file_input/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+SRCS(
+ growing_file_input.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+)
diff --git a/library/cpp/string_utils/subst_buf/substbuf.cpp b/library/cpp/string_utils/subst_buf/substbuf.cpp
new file mode 100644
index 0000000000..f23cb24b19
--- /dev/null
+++ b/library/cpp/string_utils/subst_buf/substbuf.cpp
@@ -0,0 +1 @@
+#include "substbuf.h"
diff --git a/library/cpp/string_utils/subst_buf/substbuf.h b/library/cpp/string_utils/subst_buf/substbuf.h
new file mode 100644
index 0000000000..357ee68ae3
--- /dev/null
+++ b/library/cpp/string_utils/subst_buf/substbuf.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <util/generic/vector.h>
+#include <util/generic/strbuf.h>
+#include <util/string/subst.h>
+
+/// Заменяет в строке одни подстроки на другие.
+template <class TBuf, class TPool>
+size_t SubstGlobal(TBuf& s, const TBuf& from, const TBuf& to, TPool& pool) {
+ if (from.empty())
+ return 0;
+
+ TVector<size_t> offs;
+ for (size_t off = 0; (off = s.find(from, off)) != TBuf::npos; off += from.length())
+ offs.push_back(off);
+ if (offs.empty())
+ return 0;
+
+ size_t dstSize = s.size() + ssize_t(offs.size()) * ssize_t(to.size() - from.size());
+ const size_t charTypeSz = sizeof(typename TBuf::char_type);
+ typename TBuf::char_type* dst = (typename TBuf::char_type*)pool.Allocate((dstSize + 1) * charTypeSz);
+ dst[dstSize] = 0;
+
+ typename TBuf::char_type* p = dst;
+ size_t lastSrc = 0;
+ for (auto off : offs) {
+ memcpy(p, s.data() + lastSrc, (off - lastSrc) * charTypeSz);
+ p += off - lastSrc;
+ lastSrc = off + from.size();
+ memcpy(p, to.data(), to.size() * charTypeSz);
+ p += to.size();
+ }
+ memcpy(p, s.data() + lastSrc, (s.size() - lastSrc) * charTypeSz);
+ p += s.size() - lastSrc;
+ Y_ASSERT(p - dst == (ssize_t)dstSize);
+
+ s = TBuf(dst, dstSize);
+ return offs.size();
+}
+
+template <class TPool>
+size_t SubstGlobal(TStringBuf& s, const TStringBuf& from, const TStringBuf& to, TPool& pool) {
+ return SubstGlobal<TStringBuf, TPool>(s, from, to, pool);
+}
+
+/// Заменяет в строке одни подстроки на другие.
+template <class TBuf, class TPool>
+inline size_t SubstGlobal(TBuf& s, typename TBuf::char_type from, typename TBuf::char_type to, TPool& pool) {
+ size_t result = 0;
+ size_t off = s.find(from);
+ if (off == TBuf::npos)
+ return 0;
+
+ s = TBuf(pool.Append(s), s.size());
+
+ for (typename TBuf::char_type* it = const_cast<typename TBuf::char_type*>(s.begin()) + off; it != s.end(); ++it) {
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ }
+ return result;
+}
diff --git a/library/cpp/string_utils/subst_buf/ya.make b/library/cpp/string_utils/subst_buf/ya.make
new file mode 100644
index 0000000000..8b8793f5b3
--- /dev/null
+++ b/library/cpp/string_utils/subst_buf/ya.make
@@ -0,0 +1,7 @@
+LIBRARY()
+
+SRCS(
+ substbuf.cpp
+)
+
+END()
diff --git a/library/cpp/ucompress/README.md b/library/cpp/ucompress/README.md
new file mode 100644
index 0000000000..5a6e9d8f42
--- /dev/null
+++ b/library/cpp/ucompress/README.md
@@ -0,0 +1 @@
+Compatible implementation of library/python/compress (also known as "uc" - uber compressor: tools/uc, ya tool uc).
diff --git a/library/cpp/ucompress/common.h b/library/cpp/ucompress/common.h
new file mode 100644
index 0000000000..d59cde9cf1
--- /dev/null
+++ b/library/cpp/ucompress/common.h
@@ -0,0 +1,8 @@
+#pragma once
+
+
+namespace NUCompress {
+ // These limitations come from original implementation - library/python/compress
+ using TBlockLen = ui32;
+ constexpr TBlockLen MaxCompressedLen = 100000000;
+}
diff --git a/library/cpp/ucompress/reader.cpp b/library/cpp/ucompress/reader.cpp
new file mode 100644
index 0000000000..45a8ca8da2
--- /dev/null
+++ b/library/cpp/ucompress/reader.cpp
@@ -0,0 +1,58 @@
+#include "reader.h"
+#include "common.h"
+
+#include <library/cpp/blockcodecs/codecs.h>
+#include <library/cpp/json/json_reader.h>
+
+#include <util/system/byteorder.h>
+
+
+using namespace NUCompress;
+
+TDecodedInput::TDecodedInput(IInputStream* in)
+ : S_(in)
+{
+ Y_ENSURE_EX(S_, TBadArgumentException() << "Null output stream");
+}
+
+TDecodedInput::~TDecodedInput() = default;
+
+size_t TDecodedInput::DoUnboundedNext(const void** ptr) {
+ if (!C_) {
+ TBlockLen blockLen = 0;
+ S_->LoadOrFail(&blockLen, sizeof(blockLen));
+ blockLen = LittleToHost(blockLen);
+ Y_ENSURE(blockLen <= MaxCompressedLen, "broken stream");
+
+ TString buf = TString::Uninitialized(blockLen);
+ S_->LoadOrFail(buf.Detach(), blockLen);
+
+ NJson::TJsonValue hdr;
+ Y_ENSURE(NJson::ReadJsonTree(buf, &hdr), "cannot parse header, suspect old format");
+
+ auto& codecName = hdr["codec"].GetString();
+ Y_ENSURE(codecName, "header does not have codec info");
+
+ // Throws TNotFound
+ C_ = NBlockCodecs::Codec(codecName);
+ Y_ASSERT(C_);
+ }
+
+ TBlockLen blockLen = 0;
+ size_t actualRead = S_->Load(&blockLen, sizeof(blockLen));
+ if (!actualRead) {
+ // End of stream
+ return 0;
+ }
+ Y_ENSURE(actualRead == sizeof(blockLen), "broken stream: cannot read block length");
+ blockLen = LittleToHost(blockLen);
+ Y_ENSURE(blockLen <= MaxCompressedLen, "broken stream");
+
+ TBuffer block;
+ block.Resize(blockLen);
+ S_->LoadOrFail(block.Data(), blockLen);
+
+ C_->Decode(block, D_);
+ *ptr = D_.Data();
+ return D_.Size();
+}
diff --git a/library/cpp/ucompress/reader.h b/library/cpp/ucompress/reader.h
new file mode 100644
index 0000000000..5a5d1c9a89
--- /dev/null
+++ b/library/cpp/ucompress/reader.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include <util/generic/buffer.h>
+#include <util/stream/walk.h>
+
+
+namespace NBlockCodecs {
+ struct ICodec;
+}
+
+namespace NUCompress {
+ class TDecodedInput: public IWalkInput {
+ public:
+ TDecodedInput(IInputStream* in);
+ ~TDecodedInput() override;
+
+ private:
+ size_t DoUnboundedNext(const void** ptr) override;
+
+ private:
+ IInputStream* const S_;
+ const NBlockCodecs::ICodec* C_ = nullptr;
+ TBuffer D_;
+ };
+}
diff --git a/library/cpp/ucompress/writer.cpp b/library/cpp/ucompress/writer.cpp
new file mode 100644
index 0000000000..40f8b12108
--- /dev/null
+++ b/library/cpp/ucompress/writer.cpp
@@ -0,0 +1,95 @@
+#include "writer.h"
+#include "common.h"
+
+#include <library/cpp/blockcodecs/codecs.h>
+#include <library/cpp/json/writer/json.h>
+
+#include <util/generic/scope.h>
+#include <util/generic/yexception.h>
+#include <util/system/byteorder.h>
+
+
+using namespace NUCompress;
+
+TCodedOutput::TCodedOutput(IOutputStream* out, const NBlockCodecs::ICodec* c, size_t bufLen)
+ : C_(c)
+ , D_(bufLen)
+ , S_(out)
+{
+ Y_ENSURE_EX(C_, TBadArgumentException() << "Null codec");
+ Y_ENSURE_EX(S_, TBadArgumentException() << "Null output stream");
+ D_.Resize(bufLen);
+ Y_ENSURE_EX(C_->MaxCompressedLength(D_) <= MaxCompressedLen, TBadArgumentException() << "Too big buffer size: " << bufLen);
+ D_.Clear();
+}
+
+TCodedOutput::~TCodedOutput() {
+ try {
+ Finish();
+ } catch (...) {
+ }
+}
+
+void TCodedOutput::DoWrite(const void* buf, size_t len) {
+ Y_ENSURE(S_, "Stream finished already");
+ const char* in = static_cast<const char*>(buf);
+
+ while (len) {
+ const size_t avail = D_.Avail();
+ if (len < avail) {
+ D_.Append(in, len);
+ return;
+ }
+
+ D_.Append(in, avail);
+ Y_ASSERT(!D_.Avail());
+ in += avail;
+ len -= avail;
+
+ FlushImpl();
+ }
+}
+
+void TCodedOutput::FlushImpl() {
+ if (!HdrWritten) {
+ NJsonWriter::TBuf jBuf;
+ jBuf.BeginObject();
+ jBuf.WriteKey("codec");
+ jBuf.WriteString(C_->Name());
+ jBuf.EndObject();
+
+ TString jStr = jBuf.Str() + '\n';
+ const TBlockLen lenToSave = HostToLittle(jStr.length());
+ S_->Write(&lenToSave, sizeof(lenToSave));
+ S_->Write(jStr.Detach(), jStr.length());
+ HdrWritten = true;
+ }
+
+ O_.Reserve(C_->MaxCompressedLength(D_));
+ const size_t oLen = C_->Compress(D_, O_.Data());
+ Y_ASSERT(oLen <= MaxCompressedLen);
+
+ const TBlockLen lenToSave = HostToLittle(oLen);
+ S_->Write(&lenToSave, sizeof(lenToSave));
+ S_->Write(O_.Data(), oLen);
+
+ D_.Clear();
+ O_.Clear();
+}
+
+void TCodedOutput::DoFlush() {
+ if (S_ && D_) {
+ FlushImpl();
+ }
+}
+
+void TCodedOutput::DoFinish() {
+ if (S_) {
+ Y_DEFER {
+ S_ = nullptr;
+ };
+ FlushImpl();
+ // Write zero-length block as EOF marker.
+ FlushImpl();
+ }
+}
diff --git a/library/cpp/ucompress/writer.h b/library/cpp/ucompress/writer.h
new file mode 100644
index 0000000000..4d3ae71093
--- /dev/null
+++ b/library/cpp/ucompress/writer.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <util/generic/buffer.h>
+#include <util/stream/output.h>
+
+
+namespace NBlockCodecs {
+ struct ICodec;
+}
+
+namespace NUCompress {
+ class TCodedOutput: public IOutputStream {
+ public:
+ TCodedOutput(IOutputStream* out, const NBlockCodecs::ICodec* c, size_t bufLen = 16 << 20);
+ ~TCodedOutput() override;
+
+ private:
+ void DoWrite(const void* buf, size_t len) override;
+ void DoFlush() override;
+ void DoFinish() override;
+
+ void FlushImpl();
+
+ private:
+ const NBlockCodecs::ICodec* const C_;
+ TBuffer D_;
+ TBuffer O_;
+ IOutputStream* S_;
+ bool HdrWritten = false;
+ };
+}
diff --git a/library/cpp/ucompress/ya.make b/library/cpp/ucompress/ya.make
new file mode 100644
index 0000000000..6582dd9a41
--- /dev/null
+++ b/library/cpp/ucompress/ya.make
@@ -0,0 +1,18 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/blockcodecs
+ library/cpp/json
+)
+
+SRCS(
+ reader.cpp
+ writer.cpp
+)
+
+END()
+
+RECURSE(
+ tests
+ ut
+)
diff --git a/library/cpp/zipatch/reader.cpp b/library/cpp/zipatch/reader.cpp
new file mode 100644
index 0000000000..03ac365da1
--- /dev/null
+++ b/library/cpp/zipatch/reader.cpp
@@ -0,0 +1,173 @@
+#include "reader.h"
+
+#include <library/cpp/json/json_reader.h>
+#include <library/cpp/json/json_value.h>
+
+#include <util/generic/hash.h>
+#include <util/memory/tempbuf.h>
+
+#include <contrib/libs/libarchive/libarchive/archive.h>
+#include <contrib/libs/libarchive/libarchive/archive_entry.h>
+
+using namespace NJson;
+
+namespace NZipatch {
+
+class TReader::TImpl {
+
+ using TEntry = archive_entry;
+
+public:
+ TImpl() {
+ if ((Archive_ = archive_read_new()) == nullptr) {
+ ythrow yexception() << "can't create archive object";
+ }
+ }
+
+ TImpl(const TFsPath& path)
+ : TImpl()
+ {
+ archive_read_support_filter_all(Archive_);
+ archive_read_support_format_zip(Archive_);
+
+ if (ARCHIVE_OK != archive_read_open_filename(Archive_, TString(path).c_str(), 10240)) {
+ ythrow yexception() << "can't open archive path = " << path;
+ }
+
+ Read();
+ }
+
+ TImpl(const TStringBuf buf)
+ : TImpl()
+ {
+ archive_read_support_filter_all(Archive_);
+ archive_read_support_format_zip(Archive_);
+
+ if (ARCHIVE_OK != archive_read_open_memory(Archive_, buf.data(), buf.size())) {
+ ythrow yexception() << "can't open in-memory archive";
+ }
+
+ Read();
+ }
+
+ ~TImpl() {
+ for (const auto& item : Files_) {
+ archive_entry_free(item.second.first);
+ }
+ if (Archive_) {
+ archive_read_free(Archive_);
+ }
+ }
+
+ void Enumerate(TOnEvent cb) const {
+ for (const auto& item : Actions_) {
+ TEvent event;
+
+ event.Action = GetTypeFromString(item["type"].GetStringSafe(TString()));
+ event.Path = item["path"].GetStringSafe(TString());
+ event.Executable = item["executable"].GetBooleanSafe(false);
+ event.Symlink = false;
+
+ if (event.Action == Copy || event.Action == Move) {
+ event.Source.Path = item["orig_path"].GetStringSafe(TString());
+ event.Source.Revision = item["orig_revision"].GetUIntegerRobust();
+ }
+ if (event.Action == StoreFile) {
+ auto fi = Files_.find(event.Path);
+ if (fi == Files_.end()) {
+ ythrow yexception() << "can't find file; path = " << event.Path;
+ }
+
+ event.Data = fi->second.second;
+ event.Symlink = archive_entry_filetype(fi->second.first) == AE_IFLNK;
+ }
+
+ if (event.Path) {
+ cb(event);
+ }
+ }
+ }
+
+private:
+ EAction GetTypeFromString(const TString& type) const {
+ if (type == "store_file") {
+ return StoreFile;
+ }
+ if (type == "mkdir") {
+ return MkDir;
+ }
+ if (type == "remove_file" || type == "remove_tree") {
+ return Remove;
+ }
+ if (type == "svn_copy") {
+ return Copy;
+ }
+ return Unknown;
+ }
+
+ void Read() {
+ TEntry* current = nullptr;
+
+ while (archive_read_next_header(Archive_, &current) == ARCHIVE_OK) {
+ const TStringBuf path(archive_entry_pathname(current));
+
+ if (path == "actions.json") {
+ TJsonValue value;
+ ReadJsonFastTree(GetData(current), &value, true);
+
+ for (const auto& item : value.GetArraySafe()) {
+ Actions_.push_back(item);
+ }
+ } else if (AsciiHasPrefix(path, "files/")) {
+ TEntry* entry = archive_entry_clone(current);
+
+ Files_.emplace(path.substr(6), std::make_pair(entry, GetData(current)));
+ }
+ }
+
+ archive_read_close(Archive_);
+ }
+
+ TString GetData(TEntry* current) const {
+ if (archive_entry_filetype(current) == AE_IFLNK) {
+ return archive_entry_symlink(current);
+ }
+
+ if (const auto size = archive_entry_size(current)) {
+ TTempBuf data(size);
+
+ if (archive_read_data(Archive_, data.Data(), size) != size) {
+ ythrow yexception() << "can't read entry";
+ }
+
+ return TString(data.Data(), size);
+ }
+
+ return TString();
+ }
+
+private:
+ struct archive* Archive_;
+ TVector<TJsonValue> Actions_;
+ THashMap<TString, std::pair<TEntry*, TString>> Files_;
+};
+
+TReader::TReader(const TFsPath& path)
+ : Impl_(new TImpl(path))
+{
+}
+
+TReader::TReader(const TStringBuf buf)
+ : Impl_(new TImpl(buf))
+{
+}
+
+TReader::~TReader()
+{ }
+
+void TReader::Enumerate(TOnEvent cb) const {
+ Impl_->Enumerate(cb);
+}
+
+} // namespace NZipatch
+
diff --git a/library/cpp/zipatch/reader.h b/library/cpp/zipatch/reader.h
new file mode 100644
index 0000000000..a94bc79b71
--- /dev/null
+++ b/library/cpp/zipatch/reader.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <util/folder/path.h>
+#include <util/generic/ptr.h>
+
+namespace NZipatch {
+
+class TReader {
+public:
+ enum EAction {
+ Unknown = 0,
+ Copy,
+ MkDir,
+ Move,
+ Remove,
+ StoreFile,
+ };
+
+ struct TSource {
+ TString Path;
+ ui64 Revision;
+ };
+
+ struct TEvent {
+ EAction Action;
+ TString Path;
+ TStringBuf Data;
+ TSource Source;
+ bool Executable;
+ bool Symlink;
+ };
+
+ using TOnEvent = std::function<void(const TEvent&)>;
+
+public:
+ TReader(const TFsPath& path);
+ TReader(const TStringBuf buf);
+ ~TReader();
+
+ void Enumerate(TOnEvent cb) const;
+
+private:
+ class TImpl;
+ THolder<TImpl> Impl_;
+};
+
+} // namespace NZipatch
+
diff --git a/library/cpp/zipatch/writer.cpp b/library/cpp/zipatch/writer.cpp
new file mode 100644
index 0000000000..a9ca451b01
--- /dev/null
+++ b/library/cpp/zipatch/writer.cpp
@@ -0,0 +1,232 @@
+#include "writer.h"
+
+#include <library/cpp/json/json_value.h>
+#include <library/cpp/json/json_writer.h>
+
+#include <util/string/join.h>
+
+#include <contrib/libs/libarchive/libarchive/archive.h>
+#include <contrib/libs/libarchive/libarchive/archive_entry.h>
+
+using namespace NJson;
+
+namespace NZipatch {
+
+class TWriter::TImpl {
+public:
+ TImpl(const TFsPath& path)
+ : Actions_(new TJsonValue(JSON_ARRAY))
+ , Meta_(new TJsonValue(JSON_MAP))
+ , Revprops_(new TJsonValue(JSON_MAP))
+ , Archive_(nullptr)
+ {
+ Archive_ = archive_write_new();
+ if (!Archive_) {
+ ythrow yexception() << "can't create archive object";
+ }
+ archive_write_set_format_zip(Archive_);
+ archive_write_zip_set_compression_deflate(Archive_);
+
+ if (ARCHIVE_OK != archive_write_open_filename(Archive_, TString(path).c_str())) {
+ ythrow yexception() << "can't open archive path = " << path;
+ }
+ }
+
+ ~TImpl() {
+ if (Actions_ || Meta_ || Revprops_) {
+ Finish();
+ }
+ if (Archive_) {
+ archive_write_free(Archive_);
+ }
+ }
+
+ void Finish() {
+ if (Actions_) {
+ if (Archive_) {
+ WriteEntry("actions.json", WriteJson(Actions_.Get(), true, false));
+ }
+
+ Actions_.Destroy();
+ }
+
+ if (Meta_) {
+ if (Archive_) {
+ WriteEntry("meta.json", WriteJson(Meta_.Get(), true));
+ }
+
+ Meta_.Destroy();
+ }
+
+ if (Revprops_) {
+ if (Archive_) {
+ WriteEntry("revprops.json", WriteJson(Revprops_.Get(), true));
+ }
+
+ Revprops_.Destroy();
+ }
+
+ if (Archive_) {
+ archive_write_close(Archive_);
+ }
+ }
+
+ void Copy(const TString& path, const TOrigin& origin) {
+ Y_ASSERT(origin.Path);
+ Y_ASSERT(origin.Revision);
+
+ if (Actions_) {
+ TJsonValue item;
+ item["type"] = "svn_copy";
+ item["path"] = path;
+ item["orig_path"] = origin.Path;
+ item["orig_revision"] = origin.Revision;
+ Actions_->AppendValue(item);
+ }
+ }
+
+ void MkDir(const TString& path) {
+ if (Actions_) {
+ TJsonValue item;
+ item["type"] = "mkdir";
+ item["path"] = path;
+ Actions_->AppendValue(item);
+ }
+ }
+
+ void RemoveFile(const TString& path) {
+ if (Actions_) {
+ TJsonValue item;
+ item["type"] = "remove_file";
+ item["path"] = path;
+ Actions_->AppendValue(item);
+ }
+ }
+
+ void RemoveTree(const TString& path) {
+ if (Actions_) {
+ TJsonValue item;
+ item["type"] = "remove_tree";
+ item["path"] = path;
+ Actions_->AppendValue(item);
+ }
+ }
+
+ void StoreFile(
+ const TString& path,
+ const TString& data,
+ const bool execute,
+ const bool symlink,
+ const TMaybe<bool> binaryHint,
+ const TMaybe<bool> encrypted)
+ {
+ if (Actions_) {
+ const TString file = Join("/", "files", path);
+ TJsonValue item;
+ item["type"] = "store_file";
+ item["executable"] = execute;
+ item["path"] = path;
+ item["file"] = file;
+ if (binaryHint.Defined()) {
+ item["binary_hint"] = *binaryHint;
+ }
+ if (encrypted.Defined()) {
+ item["encrypted"] = *encrypted;
+ }
+ Actions_->AppendValue(item);
+ WriteEntry(file, data, symlink);
+ }
+ }
+
+ void SetBaseSvnRevision(ui64 revision) {
+ if (Meta_) {
+ (*Meta_)["base_svn_revision"] = revision;
+ }
+ }
+
+ void AddRevprop(const TString& prop, const TString& value) {
+ if (Revprops_) {
+ (*Revprops_)[prop] = value;
+ }
+ }
+
+private:
+ void WriteEntry(
+ const TString& path,
+ const TString& data,
+ const bool symlink = false)
+ {
+ struct archive_entry* const entry = archive_entry_new();
+ // Write header.
+ archive_entry_set_pathname(entry, path.c_str());
+ archive_entry_set_size(entry, data.size());
+ archive_entry_set_filetype(entry, symlink ? AE_IFLNK : AE_IFREG);
+ archive_entry_set_perm(entry, 0644);
+ if (symlink) {
+ archive_entry_set_symlink(entry, data.c_str());
+ }
+ archive_write_header(Archive_, entry);
+ // Write data.
+ // If entry is symlink then entry size become zero.
+ if (archive_entry_size(entry) > 0) {
+ archive_write_data(Archive_, data.data(), data.size());
+ }
+ archive_entry_free(entry);
+ }
+
+private:
+ THolder<NJson::TJsonValue> Actions_;
+ THolder<NJson::TJsonValue> Meta_;
+ THolder<NJson::TJsonValue> Revprops_;
+ struct archive* Archive_;
+};
+
+TWriter::TWriter(const TFsPath& path)
+ : Impl_(new TImpl(path))
+{
+}
+
+TWriter::~TWriter()
+{ }
+
+void TWriter::Finish() {
+ Impl_->Finish();
+}
+
+void TWriter::SetBaseSvnRevision(ui64 revision) {
+ Impl_->SetBaseSvnRevision(revision);
+}
+
+void TWriter::AddRevprop(const TString& prop, const TString& value) {
+ Impl_->AddRevprop(prop, value);
+}
+
+void TWriter::Copy(const TString& path, const TOrigin& origin) {
+ Impl_->Copy(path, origin);
+}
+
+void TWriter::MkDir(const TString& path) {
+ Impl_->MkDir(path);
+}
+
+void TWriter::RemoveFile(const TString& path) {
+ Impl_->RemoveFile(path);
+}
+
+void TWriter::RemoveTree(const TString& path) {
+ Impl_->RemoveTree(path);
+}
+
+void TWriter::StoreFile(
+ const TString& path,
+ const TString& data,
+ const bool execute,
+ const bool symlink,
+ const TMaybe<bool> binaryHint,
+ const TMaybe<bool> encrypted)
+{
+ Impl_->StoreFile(path, data, execute, symlink, binaryHint, encrypted);
+}
+
+} // namespace NZipatch
+
diff --git a/library/cpp/zipatch/writer.h b/library/cpp/zipatch/writer.h
new file mode 100644
index 0000000000..75cbe49777
--- /dev/null
+++ b/library/cpp/zipatch/writer.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <util/folder/path.h>
+#include <util/generic/ptr.h>
+#include <util/generic/maybe.h>
+
+namespace NZipatch {
+
+class TWriter {
+public:
+ struct TOrigin {
+ TString Path;
+ ui64 Revision;
+
+ inline TOrigin(const TString& path, const ui64 revision)
+ : Path(path)
+ , Revision(revision)
+ { }
+ };
+
+ TWriter(const TFsPath& path);
+ ~TWriter();
+
+ void Finish();
+
+ void SetBaseSvnRevision(ui64 revision);
+
+ void AddRevprop(const TString& prop, const TString& value);
+
+ void Copy(const TString& path, const TOrigin& origin);
+
+ void MkDir(const TString& path);
+
+ void RemoveFile(const TString& path);
+
+ void RemoveTree(const TString& path);
+
+ void StoreFile(const TString& path,
+ const TString& data,
+ const bool execute,
+ const bool symlink,
+ const TMaybe<bool> binaryHint = Nothing(),
+ const TMaybe<bool> encrypted = Nothing());
+
+private:
+ class TImpl;
+ THolder<TImpl> Impl_;
+};
+
+} // namespace NZipatch
+
diff --git a/library/cpp/zipatch/ya.make b/library/cpp/zipatch/ya.make
new file mode 100644
index 0000000000..f8fd6006b2
--- /dev/null
+++ b/library/cpp/zipatch/ya.make
@@ -0,0 +1,16 @@
+LIBRARY()
+
+SRCS(
+ reader.cpp
+ writer.cpp
+)
+
+PEERDIR(
+ contrib/libs/libarchive
+ library/cpp/json
+)
+
+GENERATE_ENUM_SERIALIZATION(reader.h)
+
+END()
+