aboutsummaryrefslogtreecommitdiffstats
path: root/library
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-08-30 20:49:53 +0300
committervvvv <vvvv@ydb.tech>2023-08-30 21:17:44 +0300
commitf154e22342f327342effe873b0a00ad80c975e76 (patch)
treefff231496c10fbfcff025ed953b512bf2a82d7c0 /library
parent4ebafdd49d8b0706c5af76ef7c2d0b3b498d0310 (diff)
downloadydb-f154e22342f327342effe873b0a00ad80c975e76.tar.gz
Moved udf_test and refactored test_framework
Локально упавший тест выполняется %% vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ arc checkout move_udf_test_and_refactor_tf Switched to branch 'move_udf_test_and_refactor_tf' vvvv@mr-nvme-testing-08:~/repo/arcadia/statbox/nile/tests/yql/py2/part_2$ ya make -tA -F '*test_unchanged_table*' Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement cpu is redefined 2 -> 4 Warn[-WPluginErr]: in $B/statbox/nile/tests/yql/py2/part_2/libpy2-part_2.so: Requirement ram is redefined 16 -> 9 Number of suites skipped by name: 2, by filter *test_unchanged_table* Total 1 suite: 1 - GOOD Total 4 tests: 4 - GOOD Ok %% судя по ошибке он flaky
Diffstat (limited to 'library')
-rw-r--r--library/c/cyson/cyson.cpp447
-rw-r--r--library/c/cyson/cyson.h164
-rw-r--r--library/c/cyson/ya.make11
-rw-r--r--library/python/cyson/README.md99
-rw-r--r--library/python/cyson/cyson/__init__.py17
-rw-r--r--library/python/cyson/cyson/_cyson.pyx2148
-rw-r--r--library/python/cyson/cyson/helpers.cpp179
-rw-r--r--library/python/cyson/cyson/helpers.h48
-rw-r--r--library/python/cyson/cyson/libcyson.pxd116
-rw-r--r--library/python/cyson/cyson/unsigned_long.cpp292
-rw-r--r--library/python/cyson/cyson/unsigned_long.h27
-rw-r--r--library/python/cyson/pymodule/ya.make21
-rw-r--r--library/python/cyson/ut/test_control_attributes.py258
-rw-r--r--library/python/cyson/ut/test_input_stream.py82
-rw-r--r--library/python/cyson/ut/test_py_reader_writer.py70
-rw-r--r--library/python/cyson/ut/test_reader_writer.py251
-rw-r--r--library/python/cyson/ut/test_unsigned_long.py222
-rw-r--r--library/python/cyson/ut/ya.make21
-rw-r--r--library/python/cyson/ya.make28
19 files changed, 4501 insertions, 0 deletions
diff --git a/library/c/cyson/cyson.cpp b/library/c/cyson/cyson.cpp
new file mode 100644
index 0000000000..f8bff01a96
--- /dev/null
+++ b/library/c/cyson/cyson.cpp
@@ -0,0 +1,447 @@
+// Export visible API
+
+#include "cyson.h"
+
+#include <library/cpp/yson_pull/yson.h>
+#include <library/cpp/yson_pull/detail/reader.h>
+#include <library/cpp/yson_pull/detail/writer.h>
+#include <library/cpp/yson_pull/detail/input/stream.h>
+#include <library/cpp/yson_pull/detail/input/stdio_file.h>
+#include <library/cpp/yson_pull/detail/output/buffered.h>
+#include <library/cpp/yson_pull/detail/output/stream.h>
+#include <library/cpp/yson_pull/detail/output/stdio_file.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+namespace {
+ template <typename T>
+ void safe_assign_string(TString& dest, T&& value) noexcept {
+ try {
+ dest = std::forward<T>(value);
+ } catch (...) {
+ // Suppress exception
+ }
+ }
+
+} // anonymous namespace
+
+struct yson_reader {
+ NYsonPull::NDetail::gen_reader_impl<false> impl;
+ TString error_message;
+
+ yson_reader(NYsonPull::NInput::IStream* stream, NYsonPull::EStreamType mode)
+ : impl(*stream, mode)
+ {
+ error_message.reserve(64);
+ }
+
+ yson_event_type safe_get_next_event() noexcept {
+ try {
+ auto& event = impl.next_event();
+ return static_cast<yson_event_type>(event.Type());
+ } catch (...) {
+ safe_assign_string(error_message, CurrentExceptionMessage());
+ return YSON_EVENT_ERROR;
+ }
+ }
+};
+
+struct yson_writer {
+ THolder<NYsonPull::IConsumer> consumer;
+ TString error_message;
+
+ yson_writer(THolder<NYsonPull::IConsumer> consumer_)
+ : consumer{std::move(consumer_)} {
+ error_message.reserve(64);
+ }
+
+ template <typename T>
+ yson_writer_result safe_write(T&& func) noexcept {
+ try {
+ func(*consumer);
+ return YSON_WRITER_RESULT_OK;
+ } catch (const NYsonPull::NException::TBadOutput& err) {
+ safe_assign_string(error_message, err.what());
+ return YSON_WRITER_RESULT_BAD_STREAM;
+ } catch (...) {
+ safe_assign_string(error_message, CurrentExceptionMessage());
+ return YSON_WRITER_RESULT_ERROR;
+ }
+ }
+};
+
+namespace {
+ class callback_error: public std::exception {
+ public:
+ const char* what() const noexcept override {
+ return "User callback returned error result code";
+ }
+ };
+
+ class c_yson_input_stream: public NYsonPull::NInput::IStream {
+ void* ctx_;
+ yson_input_stream_func callback_;
+
+ public:
+ c_yson_input_stream(void* ctx, yson_input_stream_func callback)
+ : ctx_{ctx}
+ , callback_{callback} {
+ }
+
+ protected:
+ result do_fill_buffer() override {
+ const char* ptr;
+ size_t length;
+ switch (callback_(ctx_, &ptr, &length)) {
+ case YSON_INPUT_STREAM_RESULT_OK:
+ buffer().reset(
+ reinterpret_cast<const ui8*>(ptr),
+ reinterpret_cast<const ui8*>(ptr) + length);
+ return result::have_more_data;
+
+ case YSON_INPUT_STREAM_RESULT_EOF:
+ return result::at_end;
+
+ default:
+ case YSON_INPUT_STREAM_RESULT_ERROR:
+ throw callback_error();
+ }
+ }
+ };
+
+ class c_yson_output_stream: public NYsonPull::NDetail::NOutput::TBuffered<c_yson_output_stream> {
+ using base_type = NYsonPull::NDetail::NOutput::TBuffered<c_yson_output_stream>;
+
+ void* ctx_;
+ yson_output_stream_func callback_;
+
+ public:
+ c_yson_output_stream(void* ctx, yson_output_stream_func callback, size_t buffer_size)
+ : base_type(buffer_size)
+ , ctx_{ctx}
+ , callback_{callback} {
+ }
+
+ void write(TStringBuf data) {
+ switch (callback_(ctx_, data.data(), data.size())) {
+ case YSON_OUTPUT_STREAM_RESULT_OK:
+ return;
+
+ default:
+ case YSON_OUTPUT_STREAM_RESULT_ERROR:
+ throw callback_error();
+ }
+ }
+ };
+
+ // Type marshalling
+
+ const yson_string* to_yson_string(const NYsonPull::TScalar& value) {
+ assert(value.Type() == NYsonPull::EScalarType::String);
+ auto* result = &value.AsUnsafeValue().AsString;
+ return reinterpret_cast<const yson_string*>(result);
+ }
+
+ yson_input_stream* to_yson_input_stream(NYsonPull::NInput::IStream* ptr) {
+ return reinterpret_cast<yson_input_stream*>(ptr);
+ }
+
+ NYsonPull::NInput::IStream* from_yson_input_stream(yson_input_stream* ptr) {
+ return reinterpret_cast<NYsonPull::NInput::IStream*>(ptr);
+ }
+
+ yson_output_stream* to_yson_output_stream(NYsonPull::NOutput::IStream* ptr) {
+ return reinterpret_cast<yson_output_stream*>(ptr);
+ }
+
+ NYsonPull::NOutput::IStream* from_yson_output_stream(yson_output_stream* ptr) {
+ return reinterpret_cast<NYsonPull::NOutput::IStream*>(ptr);
+ }
+
+ // Exception-handling new/delete wrappers
+
+ template <typename T, typename... Args>
+ T* safe_new(Args&&... args) noexcept {
+ try {
+ return new T(std::forward<Args>(args)...);
+ } catch (...) {
+ return nullptr;
+ }
+ }
+
+ template <typename T>
+ void safe_delete(T* ptr) noexcept {
+ assert(ptr != nullptr);
+ try {
+ delete ptr;
+ } catch (...) {
+ // Suppress destructor exceptions
+ }
+ }
+
+ template <typename T, typename... Args>
+ yson_writer* safe_new_writer(yson_output_stream* stream, Args&&... args) noexcept {
+ try {
+ auto impl = MakeHolder<T>(
+ *from_yson_output_stream(stream),
+ std::forward<Args>(args)...);
+ return new yson_writer(std::move(impl));
+ } catch (...) {
+ return nullptr;
+ }
+ }
+
+} // anonymous namespace
+
+extern "C" {
+// Input stream
+
+yson_input_stream* yson_input_stream_from_string(const char* ptr, size_t length) {
+ auto buf = TStringBuf{ptr, length};
+ auto* result = safe_new<NYsonPull::NDetail::NInput::TOwned<TMemoryInput>>(buf);
+ return to_yson_input_stream(result);
+}
+
+yson_input_stream* yson_input_stream_from_file(FILE* file, size_t buffer_size) {
+ auto* result = safe_new<NYsonPull::NDetail::NInput::TStdioFile>(file, buffer_size);
+ return to_yson_input_stream(result);
+}
+
+yson_input_stream* yson_input_stream_from_fd(int fd, size_t buffer_size) {
+ auto* result = safe_new<NYsonPull::NDetail::NInput::TFHandle>(fd, buffer_size);
+ return to_yson_input_stream(result);
+}
+
+yson_input_stream* yson_input_stream_new(void* ctx, yson_input_stream_func callback) {
+ auto* result = safe_new<c_yson_input_stream>(ctx, callback);
+ return to_yson_input_stream(result);
+}
+
+void yson_input_stream_delete(yson_input_stream* stream) {
+ assert(stream != nullptr);
+ safe_delete(from_yson_input_stream(stream));
+}
+
+// Reader
+
+yson_reader* yson_reader_new(yson_input_stream* stream, yson_stream_type mode) {
+ assert(stream != nullptr);
+ return safe_new<yson_reader>(
+ from_yson_input_stream(stream),
+ static_cast<NYsonPull::EStreamType>(mode));
+}
+
+void yson_reader_delete(yson_reader* reader) {
+ assert(reader != nullptr);
+ safe_delete(reader);
+}
+
+yson_event_type yson_reader_get_next_event(yson_reader* reader) {
+ assert(reader != nullptr);
+ return reader->safe_get_next_event();
+}
+
+const char* yson_reader_get_error_message(yson_reader* reader) {
+ assert(reader != nullptr);
+ return reader->error_message.c_str();
+}
+
+yson_scalar_type yson_reader_get_scalar_type(yson_reader* reader) {
+ assert(reader != nullptr);
+ auto& event = reader->impl.last_event();
+ return static_cast<yson_scalar_type>(event.AsScalar().Type());
+}
+
+int yson_reader_get_boolean(yson_reader* reader) {
+ assert(reader != nullptr);
+ auto& event = reader->impl.last_event();
+ return static_cast<int>(event.AsScalar().AsBoolean());
+}
+
+i64 yson_reader_get_int64(yson_reader* reader) {
+ assert(reader != nullptr);
+ auto& event = reader->impl.last_event();
+ return event.AsScalar().AsInt64();
+}
+
+ui64 yson_reader_get_uint64(yson_reader* reader) {
+ assert(reader != nullptr);
+ auto& event = reader->impl.last_event();
+ return event.AsScalar().AsUInt64();
+}
+
+double yson_reader_get_float64(yson_reader* reader) {
+ assert(reader != nullptr);
+ auto& event = reader->impl.last_event();
+ return event.AsScalar().AsFloat64();
+}
+
+const yson_string* yson_reader_get_string(yson_reader* reader) {
+ assert(reader != nullptr);
+ return to_yson_string(reader->impl.last_event().AsScalar());
+}
+
+// Output stream
+
+yson_output_stream* yson_output_stream_from_file(FILE* file, size_t buffer_size) {
+ auto* result = safe_new<NYsonPull::NDetail::NOutput::TStdioFile>(file, buffer_size);
+ return to_yson_output_stream(result);
+}
+
+yson_output_stream* yson_output_stream_from_fd(int fd, size_t buffer_size) {
+ auto* result = safe_new<NYsonPull::NDetail::NOutput::TFHandle>(fd, buffer_size);
+ return to_yson_output_stream(result);
+}
+
+yson_output_stream* yson_output_stream_new(void* ctx, yson_output_stream_func callback, size_t buffer_size) {
+ auto* result = safe_new<c_yson_output_stream>(ctx, callback, buffer_size);
+ return to_yson_output_stream(result);
+}
+
+void yson_output_stream_delete(yson_output_stream* stream) {
+ assert(stream != nullptr);
+ safe_delete(from_yson_output_stream(stream));
+}
+
+// Writer
+
+yson_writer* yson_writer_new_binary(yson_output_stream* stream, yson_stream_type mode) {
+ assert(stream != nullptr);
+ return safe_new_writer<NYsonPull::NDetail::TBinaryWriterImpl>(
+ stream,
+ static_cast<NYsonPull::EStreamType>(mode));
+}
+
+yson_writer* yson_writer_new_text(yson_output_stream* stream, yson_stream_type mode) {
+ assert(stream != nullptr);
+ return safe_new_writer<NYsonPull::NDetail::TTextWriterImpl>(
+ stream,
+ static_cast<NYsonPull::EStreamType>(mode));
+}
+
+yson_writer* yson_writer_new_pretty_text(yson_output_stream* stream, yson_stream_type mode, size_t indent) {
+ assert(stream != nullptr);
+ return safe_new_writer<NYsonPull::NDetail::TPrettyWriterImpl>(
+ stream,
+ static_cast<NYsonPull::EStreamType>(mode),
+ indent);
+}
+
+void yson_writer_delete(yson_writer* writer) {
+ assert(writer != nullptr);
+ safe_delete(writer);
+}
+
+const char* yson_writer_get_error_message(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->error_message.c_str();
+}
+
+yson_writer_result yson_writer_write_begin_stream(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnBeginStream();
+ });
+}
+
+yson_writer_result yson_writer_write_end_stream(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnEndStream();
+ });
+}
+
+yson_writer_result yson_writer_write_begin_list(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnBeginList();
+ });
+}
+
+yson_writer_result yson_writer_write_end_list(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnEndList();
+ });
+}
+
+yson_writer_result yson_writer_write_begin_map(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnBeginMap();
+ });
+}
+
+yson_writer_result yson_writer_write_end_map(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnEndMap();
+ });
+}
+
+yson_writer_result yson_writer_write_begin_attributes(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnBeginAttributes();
+ });
+}
+
+yson_writer_result yson_writer_write_end_attributes(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnEndAttributes();
+ });
+}
+
+yson_writer_result yson_writer_write_entity(yson_writer* writer) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnEntity();
+ });
+}
+
+yson_writer_result yson_writer_write_key(yson_writer* writer, const char* ptr, size_t length) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnKey({ptr, length});
+ });
+}
+
+yson_writer_result yson_writer_write_string(yson_writer* writer, const char* ptr, size_t length) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnScalarString({ptr, length});
+ });
+}
+
+yson_writer_result yson_writer_write_int64(yson_writer* writer, i64 value) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnScalarInt64(value);
+ });
+}
+
+yson_writer_result yson_writer_write_uint64(yson_writer* writer, ui64 value) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnScalarUInt64(value);
+ });
+}
+
+yson_writer_result yson_writer_write_boolean(yson_writer* writer, int value) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnScalarBoolean(static_cast<bool>(value));
+ });
+}
+
+yson_writer_result yson_writer_write_float64(yson_writer* writer, double value) {
+ assert(writer != nullptr);
+ return writer->safe_write([=](NYsonPull::IConsumer& consumer) {
+ consumer.OnScalarFloat64(value);
+ });
+}
+
+} // extern "C"
diff --git a/library/c/cyson/cyson.h b/library/c/cyson/cyson.h
new file mode 100644
index 0000000000..1151c7fd89
--- /dev/null
+++ b/library/c/cyson/cyson.h
@@ -0,0 +1,164 @@
+#pragma once
+
+#include <library/cpp/yson_pull/cyson_enums.h>
+
+#include <util/system/types.h>
+
+#include <stddef.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct yson_string {
+ const char* ptr;
+ size_t length;
+} yson_string;
+
+typedef yson_input_stream_result (*yson_input_stream_func)(
+ void* ctx,
+ const char** ptr,
+ size_t* length);
+
+typedef yson_output_stream_result (*yson_output_stream_func)(
+ void* ctx,
+ const char* ptr,
+ size_t length);
+
+/* Abstract types */
+
+typedef struct yson_input_stream yson_input_stream;
+typedef struct yson_output_stream yson_output_stream;
+
+typedef struct yson_reader yson_reader;
+typedef struct yson_writer yson_writer;
+
+/* Input stream */
+
+yson_input_stream*
+yson_input_stream_from_string(const char* ptr, size_t length);
+
+yson_input_stream*
+yson_input_stream_from_file(FILE* file, size_t buffer_size);
+
+yson_input_stream*
+yson_input_stream_from_fd(int fd, size_t buffer_size);
+
+yson_input_stream*
+yson_input_stream_new(void* ctx, yson_input_stream_func callback);
+
+void yson_input_stream_delete(yson_input_stream* stream);
+
+/* Output stream */
+
+yson_output_stream*
+yson_output_stream_from_file(FILE* file, size_t buffer_size);
+
+yson_output_stream*
+yson_output_stream_from_fd(int fd, size_t buffer_size);
+
+yson_output_stream*
+yson_output_stream_new(void* ctx, yson_output_stream_func callback, size_t buffer_size);
+
+void yson_output_stream_delete(yson_output_stream* stream);
+
+/* Reader */
+
+yson_reader*
+yson_reader_new(yson_input_stream* stream, yson_stream_type mode);
+
+void yson_reader_delete(yson_reader* reader);
+
+yson_event_type
+yson_reader_get_next_event(yson_reader* reader);
+
+const char*
+yson_reader_get_error_message(yson_reader* reader);
+
+yson_scalar_type
+yson_reader_get_scalar_type(yson_reader* reader);
+
+int yson_reader_get_boolean(yson_reader* reader);
+
+i64 yson_reader_get_int64(yson_reader* reader);
+
+ui64 yson_reader_get_uint64(yson_reader* reader);
+
+double
+yson_reader_get_float64(yson_reader* reader);
+
+const yson_string*
+yson_reader_get_string(yson_reader* reader);
+
+/* Writer */
+
+yson_writer*
+yson_writer_new_binary(
+ yson_output_stream* stream,
+ yson_stream_type mode);
+
+yson_writer*
+yson_writer_new_text(
+ yson_output_stream* stream,
+ yson_stream_type mode);
+
+yson_writer*
+yson_writer_new_pretty_text(
+ yson_output_stream* stream,
+ yson_stream_type mode,
+ size_t indent);
+
+void yson_writer_delete(yson_writer* writer);
+
+const char*
+yson_writer_get_error_message(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_begin_stream(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_end_stream(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_begin_list(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_end_list(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_begin_map(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_end_map(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_begin_attributes(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_end_attributes(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_entity(yson_writer* writer);
+
+yson_writer_result
+yson_writer_write_key(yson_writer* writer, const char* ptr, size_t length);
+
+yson_writer_result
+yson_writer_write_string(yson_writer* writer, const char* ptr, size_t length);
+
+yson_writer_result
+yson_writer_write_int64(yson_writer* writer, i64 value);
+
+yson_writer_result
+yson_writer_write_uint64(yson_writer* writer, ui64 value);
+
+yson_writer_result
+yson_writer_write_boolean(yson_writer* writer, int value);
+
+yson_writer_result
+yson_writer_write_float64(yson_writer* writer, double value);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/library/c/cyson/ya.make b/library/c/cyson/ya.make
new file mode 100644
index 0000000000..c42965a094
--- /dev/null
+++ b/library/c/cyson/ya.make
@@ -0,0 +1,11 @@
+LIBRARY()
+
+PEERDIR(
+ library/cpp/yson_pull
+)
+
+SRCS(
+ cyson.cpp
+)
+
+END()
diff --git a/library/python/cyson/README.md b/library/python/cyson/README.md
new file mode 100644
index 0000000000..5cc67f0d2c
--- /dev/null
+++ b/library/python/cyson/README.md
@@ -0,0 +1,99 @@
+# Python-биндинги к libyson
+
+## Высокоуровневый интерфейс
+
+Функции `dumps`, `loads` для преобразования строк:
+```python
+>>> from cyson import dumps, loads
+
+>>> print dumps(1234)
+1234
+>>> print dumps("Hello world! Привет!")
+"Hello world! Привет!"
+>>> print dumps([1, "foo", None, {'aaa': 'bbb'}])
+[1; "foo"; #; {"aaa" = "bbb"}]
+>>> dumps([1, "foo", None, {'aaa': 'bbb'}], format='binary')
+'[\x02\x02;\x01\x06foo;#;{\x01\x06aaa=\x01\x06bbb}]'
+>>> print dumps([1, "foo", None, {'aaa': 'bbb'}], format='pretty')
+[
+ 1;
+ "foo";
+ #;
+ {
+ "aaa" = "bbb"
+ }
+]
+
+>>> loads('1234')
+1234
+>>> loads('3.14')
+3.14
+>>> loads('[1; "foo"; #; {"aaa" = "bbb"}]')
+[1, 'foo', None, {'aaa': 'bbb'}]
+>>> loads('[\x02\x02;\x01\x06foo;#;{\x01\x06aaa=\x01\x06bbb}]')
+[1, 'foo', None, {'aaa': 'bbb'}]
+```
+
+Функции `list_fragments`, `map_fragments` для удобного чтения из входного
+потока.
+```python
+import cyson
+
+input = cyson.list_fragments(
+ cyson.InputStream.from_fd(STDIN_FILENO),
+ process_table_index=True,
+)
+
+for record in input:
+ ...
+```
+
+## Низкоуровневый интерфейс
+
+### Адаптеры потоков ввода-вывода
+
+Классы `InputStream`, `OutputStream` не предоставляют никакой функциональности
+сами по себе, но позволяют подключить поток ввода/вывода к Reader или Writer.
+
+Конструкторы классов - статические методы с именами `from_*`:
+
+```python
+input = cyson.InputStream.from_fd(0)
+input = cyson.InputStream.from_string("...")
+input = cyson.InputStream.from_iter(iter_chunks)
+
+output = cyson.OutputStream.from_fd(1)
+output = cyson.OutputStream.from_file(stringio_file)
+```
+
+### Reader/Writer
+
+`Reader` - самый быстрый метод десериализации, и в целом позволяет получать
+объекты привычных и ожидаемых типов. При отсутствии атрибутов, порождает
+встроенные типы, иначе - `Yson*`. Не позволяет различать `list`/`tuple`, или
+получать на входе `set`.
+
+`Writer` позволяет выводить низкоуровневые элементы потока, или сериализовать
+объекты. Для сериализации объектов следует использовать метод `write()`.
+
+### StrictReader
+
+`StrictReader` отличается от Reader тем, что всегда создает объекты типа
+`Yson*`, независимо от наличия атрибутов.
+
+Никакое специальное поведение при записи в таком случае не требуется, так что
+вместе с ним можно использовать обычный `Writer`.
+
+### PyReader/PyWriter
+
+Пара для сериализации-десериализации произвольных python-типов. Тип кодируется
+атрибутом `py` у значения.
+
+Поддержка дополнительных типов добавляется с помощью декораторов
+`pywriter_handler`, `pyreader_scalar_handler`, `pyreader_list_handler`,
+`pyreader_map_handler`.
+
+### UnicodeReader
+
+Декодирует все строки в юникод. Удобен при работе с `python3`, но может
+ухудшить производительность.
diff --git a/library/python/cyson/cyson/__init__.py b/library/python/cyson/cyson/__init__.py
new file mode 100644
index 0000000000..5e0cac2241
--- /dev/null
+++ b/library/python/cyson/cyson/__init__.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+from ._cyson import * # noqa
+
+__all__ = [
+ 'loads', 'dumps', 'dumps_into',
+ 'list_fragments', 'key_switched_list_fragments', 'map_fragments',
+ 'InputStream', 'OutputStream',
+ 'Reader', 'Writer',
+ 'PyReader', 'PyWriter',
+ 'StrictReader',
+ 'YsonEntity', 'YsonString', 'YsonInt64', 'YsonUInt64',
+ 'YsonFloat64', 'YsonBoolean', 'YsonList', 'YsonMap',
+ 'UInt', 'UnicodeReader',
+]
diff --git a/library/python/cyson/cyson/_cyson.pyx b/library/python/cyson/cyson/_cyson.pyx
new file mode 100644
index 0000000000..ff538b121f
--- /dev/null
+++ b/library/python/cyson/cyson/_cyson.pyx
@@ -0,0 +1,2148 @@
+# -*- coding: utf-8 -*-
+#cython: embedsignature=True
+#cython: infer_types=True
+
+cimport cython
+cimport libc.stdio
+cimport libc.string
+cimport libcyson as C
+
+from libc.stdint cimport uint64_t, int64_t
+from libc.stddef cimport size_t
+
+cimport cpython.pycapsule
+
+from cpython.bytearray cimport (
+ PyByteArray_FromStringAndSize,
+ PyByteArray_Resize,
+ PyByteArray_AS_STRING,
+ PyByteArray_GET_SIZE,
+)
+from cpython.bytes cimport (
+ PyBytes_Check,
+ PyBytes_FromStringAndSize,
+ PyBytes_AS_STRING,
+ PyBytes_GET_SIZE,
+)
+from cpython.dict cimport PyDict_Next, PyDict_Copy, PyDict_Check
+from cpython.int cimport PyInt_FromLong
+from cpython.long cimport PyLong_AsUnsignedLong
+from cpython.list cimport PyList_GET_SIZE, PyList_GET_ITEM
+from cpython.object cimport (
+ PyObject, PyTypeObject,
+ PyObject_GetIter, PyObject_GetItem, PyObject_CallMethod,
+)
+from cpython.ref cimport Py_DECREF
+from cpython.tuple cimport PyTuple_GET_SIZE, PyTuple_GET_ITEM
+from cpython.unicode cimport (
+ PyUnicode_AsUTF8String, PyUnicode_DecodeUTF8, PyUnicode_Check
+)
+
+
+cdef extern from "Python.h":
+ # Actually returns a new reference (if an iterator isn't exhausted), but
+ # returns NULL without setting an exception for exhausted iterator (so
+ # return type isn't `object`). This declaration requires manual `Py_DECREF`
+ # calls (cython thinks the return value is a borrowed reference).
+ PyObject* PyIter_Next(object) except? NULL
+
+
+cdef extern from "library/python/cyson/cyson/helpers.h":
+ libc.stdio.FILE* PyFile_AsFile(object) except NULL
+ bint PyFile_CheckExact(object)
+ bint GenericCheckBuffer(object)
+
+
+cdef extern from "library/python/cyson/cyson/helpers.h" namespace "NCYson":
+ bint PY3
+ bint PY2
+
+ object ConvertPyStringToPyNativeString(object)
+ bytes ConvertPyStringToPyBytes(object)
+ object GetCharBufferAndOwner(object, const char**, size_t*)
+ bytes ConvertPyLongToPyBytes(object)
+
+
+cdef extern from "library/python/cyson/cyson/unsigned_long.h" namespace "NCYson":
+ PyTypeObject PyUnsignedLong_Type
+
+ type PreparePyUIntType(object)
+ object ConstructPyNumberFromUint(uint64_t)
+
+
+cdef struct PycStringIO_CAPI:
+ int (*cread)(object, char **, Py_ssize_t) except -1
+ int (*creadline)(object, char **) except -1
+ int (*cwrite)(object, const char *, Py_ssize_t) except -1
+ bytes (*cgetvalue)(object)
+ object (*NewOutput)(int)
+ object (*NewInput)(object)
+ void* InputType
+ void* OutputType
+
+
+# set numpy aliases for possible use
+cdef np = None
+cdef np_import_failed = None
+cdef npy_generic = None
+cdef npy_integers = None
+cdef npy_uintegers = None
+cdef npy_floats = None
+
+
+cdef PycStringIO_CAPI* cStringIO_CAPI
+if PY2:
+ cStringIO_CAPI = <PycStringIO_CAPI*>cpython.pycapsule.PyCapsule_Import(
+ "cStringIO.cStringIO_CAPI", 0
+ )
+
+
+class UnableToSerializeError(TypeError):
+ def __init__(self, value):
+ super(UnableToSerializeError, self).__init__(value)
+ self.value = value
+
+ def __str__(self):
+ return (
+ "Unable to serialize an object of type {!r}: {!r}"
+ .format(type(self.value), self.value)
+ )
+
+
+def _yson_repr(obj):
+ return ConvertPyStringToPyNativeString(dumps(obj))
+
+
+UInt = PreparePyUIntType(_yson_repr)
+
+
+class YsonEntity(object):
+ def __init__(self, attributes=None):
+ self.attributes = attributes
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+ def __eq__(self, other):
+ return type(other) is YsonEntity and self.attributes == other.attributes
+
+
+class YsonString(bytes):
+ def __new__(cls, value, attributes=None):
+ self = bytes.__new__(cls, ConvertPyStringToPyBytes(value))
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonInt64(int):
+ def __new__(cls, value, attributes=None):
+ self = int.__new__(cls, value)
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonUInt64(long):
+ def __new__(cls, value, attributes=None):
+ self = long.__new__(cls, value)
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonFloat64(float):
+ def __new__(cls, value, attributes=None):
+ self = float.__new__(cls, value)
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonBoolean(int):
+ def __new__(cls, value, attributes=None):
+ self = int.__new__(cls, bool(value))
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonList(list):
+ def __new__(cls, value, attributes=None):
+ self = list.__new__(cls, value)
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+class YsonMap(dict):
+ def __new__(cls, value, attributes=None):
+ self = dict.__new__(cls, value)
+ self.attributes = attributes
+ return self
+
+ def __repr__(self):
+ return _yson_repr(self)
+
+
+# Input
+
+@cython.freelist(16)
+cdef class InputStream:
+ """YSON input stream adaptor.
+
+ Provides means for YSON Reader to read raw bytes from some stream.
+
+ A proper way to construct an InputStream instance is to use
+ one of its static constructor methods
+ (:meth:`from_string`, :meth:`from_file`, etc).
+
+ """
+
+ cdef object capsule
+ cdef object data
+
+ def __cinit__(self, capsule, data=None):
+ self.capsule = capsule
+ self.data = data
+
+ cdef C.yson_input_stream* ptr(self):
+ return _c_open_yson_input_stream_capsule(self.capsule)
+
+ @staticmethod
+ def from_string(value):
+ """Read from a contiguous memory buffer.
+
+ :param value: Input memory buffer: any object implementing char buffer protocol (i.e. str/bytes).
+
+ """
+
+ cdef const char* data
+ cdef size_t size
+ cdef object holder = GetCharBufferAndOwner(value, &data, &size)
+
+ capsule = _c_make_yson_input_stream_capsule(
+ C.yson_input_stream_from_string(data, size)
+ )
+
+ return InputStream.__new__(InputStream, capsule, holder)
+
+ @staticmethod
+ def from_file(file_obj, int buffer_size=65536):
+ """Read from an arbitrary file-like object.
+
+ Two special cases are available within CPython:
+ - Builtin python file objects (effectively ``FILE*`` wrappers)
+ - :class:`cStringIO.StringIO` objects
+
+ In other cases ``file_obj`` is required to have a read() method
+ accepting integer argument.
+
+ """
+
+ cdef libc.stdio.FILE* c_file
+
+ # Special case for cStringIO streams, both types support reading
+ t = type(file_obj)
+ if cStringIO_CAPI and (
+ t is <object>cStringIO_CAPI.InputType or
+ t is <object>cStringIO_CAPI.OutputType
+ ):
+ yson_input_stream = C.yson_input_stream_new(
+ <void*>file_obj,
+ _c_yson_input_stream_cstringio_read,
+ )
+ # Special case for building file objects (effectively FILE* wrappers)
+ elif PyFile_CheckExact(file_obj):
+ yson_input_stream = C.yson_input_stream_from_file(
+ PyFile_AsFile(file_obj), buffer_size
+ )
+ # General case: use Python read(<size>) method
+ else:
+ file_obj = _ReadableHolder.__new__(
+ _ReadableHolder, file_obj, buffer_size
+ )
+ yson_input_stream = C.yson_input_stream_new(
+ <void*>file_obj,
+ _c_yson_input_stream_generic_read,
+ )
+
+ return InputStream.__new__(
+ InputStream,
+ _c_make_yson_input_stream_capsule(yson_input_stream),
+ file_obj,
+ )
+
+ @staticmethod
+ def from_fd(int fd, int buffer_size=65536):
+ """Read from a POSIX file descriptor.
+
+ :param fd: File descriptor number.
+ :param buffer_size: InputStream internal buffer size.
+
+ """
+
+ capsule = _c_make_yson_input_stream_capsule(
+ C.yson_input_stream_from_fd(fd, buffer_size)
+ )
+ return InputStream.__new__(InputStream, capsule)
+
+ @staticmethod
+ def from_iter(iter_obj):
+ """Read from a chunked input stream.
+
+ :param iter_obj: An iterator which yields a series
+ of contiguous memory buffer objects.
+
+ >>> from cyson import InputStream, Reader
+ >>> def stream():
+ ... yield '['
+ ... yield '123;'
+ ... yield 'hel'
+ ... yield 'lo;'
+ ... yield 'world;'
+ ... yield '#;'
+ ... yield ']'
+ ...
+ >>> Reader(InputStream.from_iter(stream()), mode='node').node()
+ [123, 'hello', 'world', None]
+
+ """
+
+ holder = _IteratorHolder.__new__(_IteratorHolder, iter_obj)
+ capsule = _c_make_yson_input_stream_capsule(
+ C.yson_input_stream_new(<void*> holder, _c_yson_input_stream_next)
+ )
+ return InputStream.__new__(InputStream, capsule, holder)
+
+
+cdef void _c_destroy_yson_input_stream_capsule(object capsule):
+ cdef C.yson_input_stream* c_stream
+ c_stream = _c_open_yson_input_stream_capsule(capsule)
+ C.yson_input_stream_delete(c_stream)
+
+
+cdef inline C.yson_input_stream* _c_open_yson_input_stream_capsule(object capsule):
+ return <C.yson_input_stream*> cpython.pycapsule.PyCapsule_GetPointer(
+ capsule,
+ "yson_input_stream"
+ )
+
+
+cdef inline object _c_make_yson_input_stream_capsule(C.yson_input_stream* c_stream):
+ return cpython.pycapsule.PyCapsule_New(
+ c_stream,
+ "yson_input_stream",
+ _c_destroy_yson_input_stream_capsule,
+ )
+
+
+cdef class _IteratorHolder:
+ cdef object iter_
+ cdef object last_buffer_holder_
+
+ def __cinit__(self, obj):
+ self.iter_ = obj
+ self.last_buffer_holder_ = None
+
+ cdef inline get_next_buffer(self, const char** data, size_t* size):
+ self.last_buffer_holder_ = GetCharBufferAndOwner(next(self.iter_), data, size)
+
+
+cdef class _ReadableHolder:
+ cdef object fileobj_
+ cdef object chunk_size_
+ cdef size_t c_chunk_size_
+ cdef object last_buffer_holder_
+
+ def __cinit__(self, fileobj, chunk_size):
+ self.fileobj_ = fileobj
+ self.chunk_size_ = chunk_size
+ self.c_chunk_size_ = <size_t>PyLong_AsUnsignedLong(chunk_size)
+
+ cdef inline read_chunk(self, const char** data, size_t* size):
+ self.last_buffer_holder_ = GetCharBufferAndOwner(
+ self.fileobj_.read(self.chunk_size_), data, size
+ )
+
+
+cdef C.yson_input_stream_result _c_yson_input_stream_next(
+ void* ctx, const char** ptr, size_t* length
+) except C.YSON_INPUT_STREAM_RESULT_ERROR:
+ try:
+ (<_IteratorHolder>ctx).get_next_buffer(ptr, length)
+ except StopIteration:
+ return C.YSON_INPUT_STREAM_RESULT_EOF
+ else:
+ return C.YSON_INPUT_STREAM_RESULT_OK
+
+
+cdef C.yson_input_stream_result _c_yson_input_stream_cstringio_read(
+ void* ctx, const char** ptr, size_t* length
+) except C.YSON_INPUT_STREAM_RESULT_ERROR:
+ """Callback for reading from cStringIO.StringIO objects."""
+
+ nread = cStringIO_CAPI.cread(<object>ctx, <char**>ptr, -1)
+ length[0] = nread
+
+ if nread > 0:
+ return C.YSON_INPUT_STREAM_RESULT_OK
+ if nread == 0:
+ return C.YSON_INPUT_STREAM_RESULT_EOF
+ else:
+ return C.YSON_INPUT_STREAM_RESULT_ERROR
+
+
+cdef C.yson_input_stream_result _c_yson_input_stream_generic_read(
+ void* ctx, const char** ptr, size_t* length
+) except C.YSON_INPUT_STREAM_RESULT_ERROR:
+ """Callback for reading from arbitrary Python file-like objects."""
+
+ cdef _ReadableHolder holder = <_ReadableHolder>ctx
+
+ holder.read_chunk(ptr, length)
+
+ if length[0] > holder.c_chunk_size_:
+ raise RuntimeError(
+ 'reading inconsistency: {} bytes were read, but only {} requested'
+ .format(length[0], holder.chunk_size_)
+ )
+
+ if length[0] > 0:
+ return C.YSON_INPUT_STREAM_RESULT_OK
+ elif length[0] == 0:
+ return C.YSON_INPUT_STREAM_RESULT_EOF
+ else:
+ return C.YSON_INPUT_STREAM_RESULT_ERROR
+
+
+# Output
+
+@cython.freelist(16)
+cdef class OutputStream:
+ """YSON output stream adaptor.
+
+ Provides means for YSON Writer to write raw bytes to some stream.
+
+ A proper way to construct an InputStream instance is to use
+ one of its static constructor methods
+ (:meth:`from_file`, :meth:`from_fd`).
+
+ To write into a string, use :meth:`to_file` with
+ a :class:`cStringIO.StringIO` instance.
+
+ To write into a custom stream, provide your object
+ with ``write()`` method.
+
+ """
+
+ cdef object capsule
+ cdef object data
+
+ def __cinit__(self, capsule, data = None):
+ self.capsule = capsule
+ self.data = data
+
+ cdef C.yson_output_stream* ptr(self):
+ return _c_open_yson_output_stream_capsule(self.capsule)
+
+ @staticmethod
+ def from_file(file_obj, int buffer_size=65536):
+ """Write to an arbitrary file-like object.
+
+ A file object is required to have a write() method accepting str/bytes
+ arguments.
+
+ Two special cases are available within CPython:
+ - Builtin python file objects (effectively ``FILE*`` wrappers).
+ - :class:`cStringIO.StringIO` objects.
+
+ For these cases, special optimized implementations are used.
+
+ """
+
+ cdef libc.stdio.FILE* c_file
+
+ # Special case for cStringIO streams
+ if cStringIO_CAPI and type(file_obj) is <object> cStringIO_CAPI.OutputType:
+ capsule = _c_make_yson_output_stream_capsule(
+ C.yson_output_stream_new(
+ <void*> file_obj,
+ _c_yson_output_stream_cstringio_write,
+ buffer_size
+ )
+ )
+ # Special case for builting file objects (effectively FILE* wrappers)
+ elif PyFile_CheckExact(file_obj):
+ c_file = PyFile_AsFile(file_obj)
+ capsule = _c_make_yson_output_stream_capsule(
+ C.yson_output_stream_from_file(
+ c_file,
+ buffer_size
+ )
+ )
+ # General case: use python write() method
+ else:
+ capsule = _c_make_yson_output_stream_capsule(
+ C.yson_output_stream_new(
+ <void*> file_obj,
+ _c_yson_output_stream_write,
+ buffer_size
+ )
+ )
+
+ return OutputStream.__new__(OutputStream, capsule, file_obj)
+
+ @staticmethod
+ def from_fd(int fd, int buffer_size=65536):
+ """Write to a POSIX file descriptor.
+
+ :param fd: File descriptor number.
+ :param buffer_size: OutputStream internal buffer size.
+
+ """
+
+ capsule = _c_make_yson_output_stream_capsule(
+ C.yson_output_stream_from_fd(fd, buffer_size)
+ )
+ return OutputStream.__new__(OutputStream, capsule)
+
+ @staticmethod
+ def from_bytearray(bytearray dest, int buffer_size=0):
+ capsule = _c_make_yson_output_stream_capsule(
+ C.yson_output_stream_new(
+ <void*> dest,
+ _c_yson_output_stream_bytearray_write,
+ buffer_size
+ )
+ )
+ return OutputStream.__new__(OutputStream, capsule, dest)
+
+
+cdef void _c_destroy_yson_output_stream_capsule(object capsule):
+ cdef C.yson_output_stream* c_stream
+ c_stream = _c_open_yson_output_stream_capsule(capsule)
+ C.yson_output_stream_delete(c_stream)
+
+
+cdef inline C.yson_output_stream* _c_open_yson_output_stream_capsule(object capsule):
+ return <C.yson_output_stream*> cpython.pycapsule.PyCapsule_GetPointer(
+ capsule,
+ "yson_output_stream"
+ )
+
+
+cdef inline object _c_make_yson_output_stream_capsule(C.yson_output_stream* c_stream):
+ return cpython.pycapsule.PyCapsule_New(
+ c_stream,
+ "yson_output_stream",
+ _c_destroy_yson_output_stream_capsule,
+ )
+
+
+cdef C.yson_output_stream_result _c_yson_output_stream_write(
+ void* ctx,
+ const char* ptr,
+ size_t length
+) except C.YSON_OUTPUT_STREAM_RESULT_ERROR:
+ """Callback for writing into arbitrary Python file objects."""
+
+ obj = <object> ctx
+ data = PyBytes_FromStringAndSize(ptr, length)
+ obj.write(data)
+ return C.YSON_OUTPUT_STREAM_RESULT_OK
+
+
+cdef C.yson_output_stream_result _c_yson_output_stream_cstringio_write(
+ void* ctx,
+ const char* ptr,
+ size_t length
+) except C.YSON_OUTPUT_STREAM_RESULT_ERROR:
+ """Callback for writing into cStringIO.StringIO objects."""
+
+ obj = <object> ctx
+ cStringIO_CAPI.cwrite(obj, ptr, length);
+ return C.YSON_OUTPUT_STREAM_RESULT_OK
+
+
+cdef C.yson_output_stream_result _c_yson_output_stream_bytearray_write(
+ void* ctx,
+ const char* ptr,
+ size_t length
+) except C.YSON_OUTPUT_STREAM_RESULT_ERROR:
+ """Callback for writing into bytearray objects."""
+
+ cdef bytearray obj = <bytearray>ctx
+ cdef size_t old_length = PyByteArray_GET_SIZE(obj)
+
+ PyByteArray_Resize(obj, old_length + length)
+ libc.string.memcpy(PyByteArray_AS_STRING(obj) + old_length, ptr, length)
+
+ return C.YSON_OUTPUT_STREAM_RESULT_OK
+
+
+# Reader
+
+cdef class ListFragmentIterator
+
+@cython.freelist(16)
+cdef class Reader:
+ cdef C.yson_reader* c_reader
+ cdef InputStream stream
+
+ def __cinit__(self, InputStream stream not None, mode=b'node'):
+ """Create a YSON Reader for reading from ``stream``.
+
+ Attributes on values are ignored.
+
+ :param stream: Input stream object.
+ :param mode: Input stream shape: 'node', 'list_frament', 'map_fragment'.
+
+ """
+ cdef C.yson_stream_type stream_type
+
+ cdef bytes bytes_mode = ConvertPyStringToPyBytes(mode)
+
+ if bytes_mode == b'node':
+ stream_type = C.YSON_STREAM_TYPE_NODE
+ elif bytes_mode == b'list_fragment':
+ stream_type = C.YSON_STREAM_TYPE_LIST_FRAGMENT
+ elif bytes_mode == b'map_fragment':
+ stream_type = C.YSON_STREAM_TYPE_MAP_FRAGMENT
+ else:
+ raise ValueError("Invalid reader mode {!r}".format(bytes_mode))
+
+ self.c_reader = C.yson_reader_new(
+ stream.ptr(),
+ stream_type
+ )
+ self.stream = stream
+
+ def __dealloc__(self):
+ C.yson_reader_delete(self.c_reader)
+
+ cdef _scalar_handler(self, value, dict attributes):
+ if value is None:
+ return YsonEntity(attributes)
+ return value
+
+ cdef _list_handler(self, ListFragmentIterator items, dict attributes):
+ return YsonList(items)
+
+ cdef _map_handler(self, MapFragmentIterator items, dict attributes):
+ return YsonMap(items)
+
+ cdef _read_object(self, C.yson_event_type event_type):
+ return _reader_read_object(self, event_type)
+
+ def node(self):
+ """Read whole input stream as a single YSON node.
+
+ >>> from cyson import InputStream, Reader
+
+ >>> s = '{key=1;value=foo}'
+ >>> r = Reader(InputStream.from_string(s), mode='node')
+ >>> r.node() == {'value': 'foo', 'key': 1}
+ True
+
+ Invalid stream shape results in exception:
+
+ >>> s = '{key=1;value=foo}; {key=2; value=bar}'
+ >>> r = Reader(InputStream.from_string(s), mode='node')
+ >>> r.node()
+ Traceback (most recent call last):
+ ...
+ ValueError: Invalid YSON at offset 17: Expected stream end, but found ";"
+
+ """
+
+ cdef C.yson_event_type event_type
+
+ event_type = _c_yson_reader_get_next_event(self.c_reader)
+ assert event_type == C.YSON_EVENT_BEGIN_STREAM
+
+ event_type = _c_yson_reader_get_next_event(self.c_reader)
+ result = self._read_object(event_type)
+
+ event_type = _c_yson_reader_get_next_event(self.c_reader)
+ assert event_type == C.YSON_EVENT_END_STREAM
+
+ return result
+
+ def list_fragments(
+ self, process_table_index=False, process_attributes=False,
+ stop_at_key_switch=False, keep_control_records=False
+ ):
+ """Iterate over input stream as a sequence of objects.
+
+ >>> from cyson import InputStream, Reader
+
+ >>> s = '{key=1;value=foo}; {key=2; value=bar}'
+ >>> r = Reader(InputStream.from_string(s), mode='list_fragment')
+ >>> l = list(r.list_fragments())
+ >>> l == [{'value': 'foo', 'key': 1}, {'value': 'bar', 'key': 2}]
+ True
+
+ """
+
+ cdef C.yson_event_type event_type
+
+ event_type = _c_yson_reader_get_next_event(self.c_reader)
+ assert event_type == C.YSON_EVENT_BEGIN_STREAM
+
+ # NOTE: for backward compatibility purpose
+ process_attributes = process_attributes or process_table_index
+
+ return ListFragmentIterator(
+ self,
+ C.YSON_EVENT_END_STREAM,
+ process_attributes,
+ stop_at_key_switch,
+ keep_control_records,
+ )
+
+ def map_fragments(self):
+ """Iterate over input stream as a sequence of (key, value) pairs.
+
+ >>> from cyson import InputStream, Reader
+
+ >>> s = 'a=b; c=d; e=[1;3;4]'
+ >>> r = Reader(InputStream.from_string(s), mode='map_fragment')
+ >>> list(r.map_fragments())
+ [('a', 'b'), ('c', 'd'), ('e', [1, 3, 4])]
+
+ """
+
+ cdef C.yson_event_type event_type
+
+ event_type = _c_yson_reader_get_next_event(self.c_reader)
+ assert event_type == C.YSON_EVENT_BEGIN_STREAM
+
+ return MapFragmentIterator(self, C.YSON_EVENT_END_STREAM)
+
+
+@cython.freelist(16)
+cdef class ListFragmentIterator:
+ cdef Reader reader
+ cdef readonly bint at_begin
+ cdef readonly bint at_end
+ cdef bint stop_at_key_switch
+ cdef C.yson_event_type end_event
+ cdef bint process_attributes
+ cdef bint keep_control_records
+ cdef readonly int table_index
+ cdef int64_t row_index_base
+ cdef int64_t row_index_offset
+ cdef int64_t range_index
+ cdef readonly bint is_key_switched
+
+ def __cinit__(
+ self,
+ Reader reader not None,
+ C.yson_event_type end_event,
+ bint process_attributes,
+ bint stop_at_key_switch,
+ bint keep_control_records
+ ):
+ self.reader = reader
+ self.at_begin = True
+ self.at_end = False
+ self.end_event = end_event
+ self.process_attributes = process_attributes
+ self.stop_at_key_switch = stop_at_key_switch
+ self.keep_control_records = keep_control_records
+ self.table_index = 0
+ self.row_index_base = -1
+ self.row_index_offset = -1
+ self.range_index = -1
+ self.is_key_switched = False
+
+ def __iter__(self):
+ return self
+
+ cpdef close(self):
+ if not self.at_end:
+ for _ in self:
+ pass
+ self.at_end = True
+
+ def __next__(self):
+ cdef C.yson_event_type event_type
+ cdef bint was_at_begin = self.at_begin
+
+ if self.at_end:
+ raise StopIteration
+
+ self.at_begin = False
+ self.is_key_switched = False
+
+ event_type = _c_yson_reader_get_next_event(self.reader.c_reader)
+ if event_type == self.end_event:
+ self.at_end = True
+ raise StopIteration
+
+ value = self.reader._read_object(event_type)
+ if self.process_attributes and isinstance(value, YsonEntity):
+ self.do_process_attributes(value.attributes)
+
+ if not was_at_begin and self.stop_at_key_switch and self.is_key_switched:
+ raise StopIteration
+
+ if self.keep_control_records:
+ return value
+
+ return self.__next__()
+ else:
+ self.row_index_offset += 1
+ return value
+
+ property row_index:
+ def __get__(self):
+ if self.row_index_base >= 0:
+ return self.row_index_base + self.row_index_offset
+
+ property range_index:
+ def __get__(self):
+ if self.range_index >= 0:
+ return self.range_index
+
+ cdef do_process_attributes(self, dict attributes):
+ table_index = attributes.get(b'table_index')
+ if table_index is not None:
+ self.table_index = table_index
+
+ row_index = attributes.get(b'row_index')
+ if row_index is not None:
+ self.row_index_base = row_index
+ self.row_index_offset = -1
+
+ range_index = attributes.get(b'range_index')
+ if range_index is not None:
+ self.range_index = range_index
+
+ key_switch = attributes.get(b'key_switch')
+ if key_switch is not None:
+ self.is_key_switched = key_switch
+
+
+cdef class MapFragmentIterator:
+ cdef Reader reader
+ cdef bint at_end
+ cdef C.yson_event_type end_event
+
+ def __cinit__(self,
+ Reader reader not None,
+ C.yson_event_type end_event):
+ self.reader = reader
+ self.at_end = False
+ self.end_event = end_event
+
+ def __iter__(self):
+ return self
+
+ cpdef close(self):
+ if not self.at_end:
+ for _ in self:
+ pass
+ self.at_end = True
+
+ def __next__(self):
+ cdef C.yson_event_type event_type
+
+ if self.at_end:
+ raise StopIteration
+
+ event_type = _c_yson_reader_get_next_event(self.reader.c_reader)
+ if event_type == self.end_event:
+ self.at_end = True
+ raise StopIteration
+
+ key = _c_yson_reader_get_byte_string(self.reader.c_reader)
+ value = self.reader._read_object(
+ _c_yson_reader_get_next_event(self.reader.c_reader)
+ )
+ return key, value
+
+
+cdef inline bytes _c_yson_reader_get_error(C.yson_reader* c_reader):
+ return <bytes> C.yson_reader_get_error_message(c_reader)
+
+
+cdef inline C.yson_event_type _c_yson_reader_get_next_event(
+ C.yson_reader* c_reader
+) except C.YSON_EVENT_ERROR:
+ cdef C.yson_event_type event_type
+
+ event_type = C.yson_reader_get_next_event(c_reader)
+ # A propagated exception would have fired earlier
+ if event_type == C.YSON_EVENT_ERROR:
+ raise ValueError(_c_yson_reader_get_error(c_reader))
+ return event_type
+
+
+cdef object _c_yson_reader_get_scalar(C.yson_reader* c_reader):
+ cdef C.yson_scalar_type scalar_type
+
+ scalar_type = C.yson_reader_get_scalar_type(c_reader)
+ if scalar_type == C.YSON_SCALAR_ENTITY:
+ return None
+ elif scalar_type == C.YSON_SCALAR_BOOLEAN:
+ return C.yson_reader_get_boolean(c_reader)
+ elif scalar_type == C.YSON_SCALAR_INT64:
+ return PyInt_FromLong(C.yson_reader_get_int64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_UINT64:
+ return ConstructPyNumberFromUint(C.yson_reader_get_uint64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_FLOAT64:
+ return C.yson_reader_get_float64(c_reader)
+ elif scalar_type == C.YSON_SCALAR_STRING:
+ return _c_yson_reader_get_byte_string(c_reader)
+
+
+cdef inline bytes _c_yson_reader_get_byte_string(C.yson_reader* c_reader):
+ cdef const C.yson_string* ref
+
+ ref = C.yson_reader_get_string(c_reader)
+ return PyBytes_FromStringAndSize(ref.ptr, ref.length)
+
+
+cdef list _reader_read_list(Reader reader):
+ cdef C.yson_event_type event_type
+
+ result = []
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == C.YSON_EVENT_END_LIST:
+ return result
+ else:
+ result.append(_reader_read_object(reader, event_type))
+
+
+cdef dict _reader_read_map(
+ Reader reader,
+ C.yson_event_type end = C.YSON_EVENT_END_MAP,
+):
+ cdef C.yson_event_type event_type
+
+ result = {}
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == end:
+ return result
+ else:
+ key = _c_yson_reader_get_byte_string(reader.c_reader)
+ value = _reader_read_object(
+ reader,
+ _c_yson_reader_get_next_event(reader.c_reader)
+ )
+ result[key] = value
+
+
+cdef _reader_read_object(
+ Reader reader,
+ C.yson_event_type event_type,
+):
+ if event_type == C.YSON_EVENT_SCALAR:
+ return _c_yson_reader_get_scalar(reader.c_reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_LIST:
+ return _reader_read_list(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_MAP:
+ return _reader_read_map(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_ATTRIBUTES:
+ attributes = _reader_read_map(reader, C.YSON_EVENT_END_ATTRIBUTES)
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ return _reader_read_object_with_attributes(reader, event_type, attributes)
+
+
+cdef _reader_read_object_with_attributes(
+ Reader reader,
+ C.yson_event_type event_type,
+ dict attributes,
+):
+ cdef ListFragmentIterator l_items
+ cdef MapFragmentIterator m_items
+
+ if event_type == C.YSON_EVENT_SCALAR:
+ return reader._scalar_handler(
+ _c_yson_reader_get_scalar(reader.c_reader),
+ attributes
+ )
+
+ elif event_type == C.YSON_EVENT_BEGIN_LIST:
+ l_items = ListFragmentIterator.__new__(
+ ListFragmentIterator,
+ reader,
+ C.YSON_EVENT_END_LIST,
+ False,
+ False,
+ False
+ )
+ result = reader._list_handler(l_items, attributes)
+ l_items.close()
+ return result
+
+ elif event_type == C.YSON_EVENT_BEGIN_MAP:
+ m_items = MapFragmentIterator.__new__(
+ MapFragmentIterator,
+ reader,
+ C.YSON_EVENT_END_MAP,
+ )
+ result = reader._map_handler(m_items, attributes)
+ m_items.close()
+ return result
+
+
+# StrictReader
+
+cdef class StrictReader(Reader):
+
+ cdef _read_object(self, C.yson_event_type event_type):
+ return _strict_reader_read_object(self, event_type)
+
+
+cdef object _c_yson_reader_get_scalar_strict(C.yson_reader* c_reader):
+ cdef C.yson_scalar_type scalar_type
+
+ scalar_type = C.yson_reader_get_scalar_type(c_reader)
+ if scalar_type == C.YSON_SCALAR_ENTITY:
+ return YsonEntity()
+ elif scalar_type == C.YSON_SCALAR_BOOLEAN:
+ return YsonBoolean(C.yson_reader_get_boolean(c_reader))
+ elif scalar_type == C.YSON_SCALAR_INT64:
+ return YsonInt64(C.yson_reader_get_int64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_UINT64:
+ return YsonUInt64(C.yson_reader_get_uint64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_FLOAT64:
+ return YsonFloat64(C.yson_reader_get_float64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_STRING:
+ return YsonString(_c_yson_reader_get_byte_string(c_reader))
+
+
+cdef _strict_reader_read_list(Reader reader):
+ cdef C.yson_event_type event_type
+
+ result = YsonList([])
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == C.YSON_EVENT_END_LIST:
+ return result
+ else:
+ result.append(_strict_reader_read_object(reader, event_type))
+
+
+cdef _strict_reader_read_map(
+ Reader reader,
+ C.yson_event_type end = C.YSON_EVENT_END_MAP,
+):
+ cdef C.yson_event_type event_type
+
+ result = YsonMap({})
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == end:
+ return result
+ else:
+ key = _c_yson_reader_get_byte_string(reader.c_reader)
+ value = _strict_reader_read_object(
+ reader,
+ _c_yson_reader_get_next_event(reader.c_reader)
+ )
+ result[key] = value
+
+
+cdef _strict_reader_read_object(
+ Reader reader,
+ C.yson_event_type event_type,
+):
+ if event_type == C.YSON_EVENT_SCALAR:
+ return _c_yson_reader_get_scalar_strict(reader.c_reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_LIST:
+ return _strict_reader_read_list(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_MAP:
+ return _strict_reader_read_map(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_ATTRIBUTES:
+ attributes = _strict_reader_read_map(reader, C.YSON_EVENT_END_ATTRIBUTES)
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ obj = _strict_reader_read_object(reader, event_type)
+ obj.attributes = PyDict_Copy(attributes)
+ return obj
+
+
+# UnicodeDecodeReader
+
+cdef class UnicodeReader(Reader):
+
+ cdef _read_object(self, C.yson_event_type event_type):
+ return _unicode_reader_read_object(self, event_type)
+
+
+cdef inline unicode _c_yson_reader_get_unicode_string(C.yson_reader* c_reader):
+ cdef const C.yson_string* ref
+
+ ref = C.yson_reader_get_string(c_reader)
+ return PyUnicode_DecodeUTF8(ref.ptr, ref.length, NULL)
+
+
+cdef object _c_yson_unicode_reader_get_scalar(C.yson_reader* c_reader):
+ cdef C.yson_scalar_type scalar_type
+
+ scalar_type = C.yson_reader_get_scalar_type(c_reader)
+ if scalar_type == C.YSON_SCALAR_ENTITY:
+ return None
+ elif scalar_type == C.YSON_SCALAR_BOOLEAN:
+ return C.yson_reader_get_boolean(c_reader)
+ elif scalar_type == C.YSON_SCALAR_INT64:
+ return PyInt_FromLong(C.yson_reader_get_int64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_UINT64:
+ return ConstructPyNumberFromUint(C.yson_reader_get_uint64(c_reader))
+ elif scalar_type == C.YSON_SCALAR_FLOAT64:
+ return C.yson_reader_get_float64(c_reader)
+ elif scalar_type == C.YSON_SCALAR_STRING:
+ return _c_yson_reader_get_unicode_string(c_reader)
+
+
+cdef list _unicode_reader_read_list(Reader reader):
+ cdef C.yson_event_type event_type
+
+ result = []
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == C.YSON_EVENT_END_LIST:
+ return result
+ else:
+ result.append(_unicode_reader_read_object(reader, event_type))
+
+
+cdef dict _unicode_reader_read_map(
+ Reader reader,
+ C.yson_event_type end = C.YSON_EVENT_END_MAP,
+):
+ cdef C.yson_event_type event_type
+
+ result = {}
+ while True:
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ if event_type == end:
+ return result
+ else:
+ key = _c_yson_reader_get_unicode_string(reader.c_reader)
+ value = _unicode_reader_read_object(
+ reader,
+ _c_yson_reader_get_next_event(reader.c_reader),
+ )
+ result[key] = value
+
+
+cdef _unicode_reader_read_object_with_attributes(
+ Reader reader,
+ C.yson_event_type event_type,
+ dict attributes,
+):
+ cdef ListFragmentIterator l_items
+ cdef MapFragmentIterator m_items
+
+ if event_type == C.YSON_EVENT_SCALAR:
+ return reader._scalar_handler(
+ _c_yson_unicode_reader_get_scalar(reader.c_reader),
+ attributes
+ )
+
+ elif event_type == C.YSON_EVENT_BEGIN_LIST:
+ l_items = ListFragmentIterator.__new__(
+ ListFragmentIterator,
+ reader,
+ C.YSON_EVENT_END_LIST,
+ False,
+ False,
+ False
+ )
+ result = reader._list_handler(l_items, attributes)
+ l_items.close()
+ return result
+
+ elif event_type == C.YSON_EVENT_BEGIN_MAP:
+ m_items = MapFragmentIterator.__new__(
+ MapFragmentIterator,
+ reader,
+ C.YSON_EVENT_END_MAP,
+ )
+ result = reader._map_handler(m_items, attributes)
+ m_items.close()
+ return result
+
+
+cdef _unicode_reader_read_object(
+ Reader reader,
+ C.yson_event_type event_type,
+):
+ if event_type == C.YSON_EVENT_SCALAR:
+ return _c_yson_unicode_reader_get_scalar(reader.c_reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_LIST:
+ return _unicode_reader_read_list(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_MAP:
+ return _unicode_reader_read_map(reader)
+
+ elif event_type == C.YSON_EVENT_BEGIN_ATTRIBUTES:
+ attributes = _reader_read_map(reader, C.YSON_EVENT_END_ATTRIBUTES)
+ event_type = _c_yson_reader_get_next_event(reader.c_reader)
+ return _unicode_reader_read_object_with_attributes(
+ reader, event_type, attributes)
+
+
+# Writer
+
+@cython.freelist(16)
+cdef class Writer:
+ cdef C.yson_writer* c_writer
+ cdef OutputStream stream
+
+ def __cinit__(
+ self,
+ OutputStream stream not None,
+ format=b'text',
+ mode=b'node',
+ int indent=4
+ ):
+ cdef C.yson_stream_type stream_type
+
+ cdef bytes bytes_mode = ConvertPyStringToPyBytes(mode)
+ cdef bytes bytes_format = ConvertPyStringToPyBytes(format)
+
+ if bytes_mode == b'node':
+ stream_type = C.YSON_STREAM_TYPE_NODE
+ elif bytes_mode == b'list_fragment':
+ stream_type = C.YSON_STREAM_TYPE_LIST_FRAGMENT
+ elif bytes_mode == b'map_fragment':
+ stream_type = C.YSON_STREAM_TYPE_MAP_FRAGMENT
+ else:
+ raise ValueError("Invalid writer mode {!r}".format(bytes_mode))
+
+ if bytes_format == b'text':
+ self.c_writer = C.yson_writer_new_text(
+ stream.ptr(),
+ stream_type,
+ )
+ elif bytes_format == b'pretty':
+ self.c_writer = C.yson_writer_new_pretty_text(
+ stream.ptr(),
+ stream_type,
+ indent,
+ )
+ elif bytes_format == b'binary':
+ self.c_writer = C.yson_writer_new_binary(
+ stream.ptr(),
+ stream_type,
+ )
+ else:
+ raise ValueError("Bad YSON format {!r}".format(bytes_format))
+
+ self.stream = stream
+
+ def __dealloc__(self):
+ C.yson_writer_delete(self.c_writer)
+
+ def begin_stream(self):
+ _c_writer_begin_stream(self.c_writer)
+ return self
+
+ def end_stream(self):
+ _c_writer_end_stream(self.c_writer)
+ return self
+
+ def begin_list(self):
+ _c_writer_begin_list(self.c_writer)
+ return self
+
+ def end_list(self):
+ _c_writer_end_list(self.c_writer)
+ return self
+
+ def begin_map(self):
+ _c_writer_begin_map(self.c_writer)
+ return self
+
+ def end_map(self):
+ _c_writer_end_map(self.c_writer)
+ return self
+
+ def begin_attributes(self):
+ _c_writer_begin_attributes(self.c_writer)
+ return self
+
+ def end_attributes(self):
+ _c_writer_end_attributes(self.c_writer)
+ return self
+
+ def entity(self):
+ _c_writer_entity(self.c_writer)
+ return self
+
+ def key(self, value):
+ _c_writer_key(self.c_writer, value)
+ return self
+
+ def string(self, value):
+ if isinstance(value, bytes):
+ _c_writer_bytes(self.c_writer, <bytes>value)
+ elif isinstance(value, unicode):
+ _c_writer_unicode(self.c_writer, <unicode>value)
+ else:
+ _c_writer_string(self.c_writer, value)
+
+ return self
+
+ def int64(self, int64_t value):
+ _c_writer_int64(self.c_writer, value)
+ return self
+
+ def uint64(self, uint64_t value):
+ _c_writer_uint64(self.c_writer, value)
+ return self
+
+ def boolean(self, bint value):
+ _c_writer_boolean(self.c_writer, value)
+ return self
+
+ def float64(self, double value):
+ _c_writer_float64(self.c_writer, value)
+ return self
+
+ def attributes(self, attrs):
+ _c_writer_begin_attributes(self.c_writer)
+
+ if isinstance(attrs, dict):
+ self._dict_common(<dict>attrs)
+ else:
+ self._mapping_common(attrs)
+
+ _c_writer_end_attributes(self.c_writer)
+
+ return self
+
+ def list(self, obj):
+ _c_writer_begin_list(self.c_writer)
+ for item in obj:
+ self.write(item)
+ _c_writer_end_list(self.c_writer)
+ return self
+
+ def map(self, obj):
+ _c_writer_begin_map(self.c_writer)
+
+ if isinstance(obj, dict):
+ self._dict_common(<dict>obj)
+ else:
+ self._mapping_common(obj)
+
+ _c_writer_end_map(self.c_writer)
+
+ return self
+
+ cpdef write(self, obj):
+ _c_writer_write(self.c_writer, obj)
+ return self
+
+ def switch_table(self, int index):
+ self.write(YsonEntity({'table_index': index}))
+ return self
+
+ cdef inline _dict_common(self, dict obj):
+ cdef PyObject* key
+ cdef PyObject* value
+ cdef Py_ssize_t pos = 0
+
+ while PyDict_Next(obj, &pos, &key, &value):
+ _c_writer_key(self.c_writer, <object>key)
+ self.write(<object>value)
+
+ cdef inline _mapping_common(self, obj):
+ for key in obj.keys():
+ _c_writer_key(self.c_writer, key)
+ self.write(obj[key])
+
+
+cdef inline int _c_writer_check_exc(C.yson_writer_result result) except? 0:
+ if result == C.YSON_WRITER_RESULT_OK:
+ return 1
+ return 0
+
+
+cdef inline _c_writer_check(
+ C.yson_writer* c_writer,
+ C.yson_writer_result result
+):
+ _c_writer_check_exc(result)
+ if result == C.YSON_WRITER_RESULT_BAD_STREAM:
+ raise RuntimeError(C.yson_writer_get_error_message(c_writer))
+ elif result == C.YSON_WRITER_RESULT_ERROR:
+ raise IOError(C.yson_writer_get_error_message(c_writer))
+
+
+cdef _c_writer_begin_stream(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_begin_stream(c_writer)
+ )
+
+
+cdef _c_writer_end_stream(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_end_stream(c_writer)
+ )
+
+
+cdef _c_writer_begin_list(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_begin_list(c_writer)
+ )
+
+
+cdef _c_writer_end_list(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_end_list(c_writer)
+ )
+
+
+cdef _c_writer_begin_map(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_begin_map(c_writer)
+ )
+
+
+cdef _c_writer_end_map(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_end_map(c_writer)
+ )
+
+
+cdef _c_writer_begin_attributes(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_begin_attributes(c_writer)
+ )
+
+
+cdef _c_writer_end_attributes(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_end_attributes(c_writer)
+ )
+
+
+cdef _c_writer_entity(C.yson_writer* c_writer):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_entity(c_writer)
+ )
+
+
+# fallback if key is not bytes or unicode
+cdef _c_writer_key(C.yson_writer* c_writer, value):
+ cdef const char* data
+ cdef size_t size
+ cdef object holder = GetCharBufferAndOwner(value, &data, &size)
+
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_key(c_writer, data, size)
+ )
+
+
+# fallback if string-like object is not bytes or unicode
+cdef _c_writer_string(C.yson_writer* c_writer, value):
+ cdef const char* data
+ cdef size_t size
+ cdef object holder = GetCharBufferAndOwner(value, &data, &size)
+
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_string(c_writer, data, size)
+ )
+
+
+cdef _c_writer_key_bytes(C.yson_writer* c_writer, bytes value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_key(
+ c_writer,
+ PyBytes_AS_STRING(value),
+ PyBytes_GET_SIZE(value),
+ )
+ )
+
+
+cdef inline _c_writer_key_unicode(C.yson_writer* c_writer, unicode value):
+ _c_writer_key_bytes(c_writer, PyUnicode_AsUTF8String(value))
+
+
+cdef _c_writer_bytes(C.yson_writer* c_writer, bytes value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_string(
+ c_writer,
+ PyBytes_AS_STRING(value),
+ PyBytes_GET_SIZE(value),
+ )
+ )
+
+
+cdef inline _c_writer_unicode(C.yson_writer* c_writer, unicode value):
+ _c_writer_bytes(c_writer, PyUnicode_AsUTF8String(value))
+
+
+cdef _c_writer_int64(C.yson_writer* c_writer, int64_t value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_int64(c_writer, value)
+ )
+
+
+cdef _c_writer_uint64(C.yson_writer* c_writer, uint64_t value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_uint64(c_writer, value)
+ )
+
+
+cdef _c_writer_boolean(C.yson_writer* c_writer, bint value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_boolean(c_writer, value)
+ )
+
+
+cdef _c_writer_float64(C.yson_writer* c_writer, double value):
+ _c_writer_check(
+ c_writer,
+ C.yson_writer_write_float64(c_writer, value)
+ )
+
+
+cdef _c_writer_list(C.yson_writer* c_writer, list value):
+ cdef Py_ssize_t index
+
+ _c_writer_begin_list(c_writer)
+
+ for index in range(PyList_GET_SIZE(value)):
+ _c_writer_write(c_writer, <object>PyList_GET_ITEM(value, index))
+
+ _c_writer_end_list(c_writer)
+
+
+cdef _c_writer_tuple(C.yson_writer* c_writer, tuple value):
+ cdef Py_ssize_t index
+
+ _c_writer_begin_list(c_writer)
+
+ for index in range(PyTuple_GET_SIZE(value)):
+ _c_writer_write(c_writer, <object>PyTuple_GET_ITEM(value, index))
+
+ _c_writer_end_list(c_writer)
+
+
+cdef inline _c_writer_dict_common(C.yson_writer* c_writer, dict mapping):
+ cdef PyObject* key
+ cdef PyObject* value
+ cdef Py_ssize_t pos = 0
+
+ while PyDict_Next(mapping, &pos, &key, &value):
+ if PyBytes_Check(<object>key):
+ _c_writer_key_bytes(c_writer, <bytes>key)
+ elif PyUnicode_Check(<object>key):
+ _c_writer_key_unicode(c_writer, <unicode>key)
+ else:
+ _c_writer_key(c_writer, <object>key)
+
+ _c_writer_write(c_writer, <object>value)
+
+
+# fallback if attributes is not dict
+cdef inline _c_writer_mapping_common(C.yson_writer* c_writer, mapping):
+ cdef object keys_iter = PyObject_GetIter(PyObject_CallMethod(mapping, "keys", NULL))
+ cdef PyObject* key = PyIter_Next(keys_iter)
+
+ while key:
+ if PyBytes_Check(<object>key):
+ _c_writer_key_bytes(c_writer, <bytes>key)
+ elif PyUnicode_Check(<object>key):
+ _c_writer_key_unicode(c_writer, <unicode>key)
+ else:
+ _c_writer_key(c_writer, <object>key)
+
+ _c_writer_write(c_writer, PyObject_GetItem(mapping, <object>key))
+
+ # See note in `PyIter_Next` declaration
+ Py_DECREF(<object>key)
+
+ key = PyIter_Next(keys_iter)
+
+
+cdef _c_writer_dict(C.yson_writer* c_writer, dict value):
+ _c_writer_begin_map(c_writer)
+ _c_writer_dict_common(c_writer, value)
+ _c_writer_end_map(c_writer)
+
+
+cdef _c_writer_attributes(C.yson_writer* c_writer, value):
+ _c_writer_begin_attributes(c_writer)
+
+ if PyDict_Check(value):
+ _c_writer_dict_common(c_writer, <dict>value)
+ else:
+ _c_writer_mapping_common(c_writer, value)
+
+ _c_writer_end_attributes(c_writer)
+
+
+cdef _c_writer_write(C.yson_writer* c_writer, value):
+ t = type(value)
+
+ if t is bytes:
+ _c_writer_bytes(c_writer, <bytes>value)
+ elif t is int:
+ _c_writer_int64(c_writer, value)
+ elif t is dict:
+ _c_writer_dict(c_writer, <dict>value)
+ elif t is tuple:
+ _c_writer_tuple(c_writer, <tuple>value)
+ elif t is list:
+ _c_writer_list(c_writer, <list>value)
+ elif t is float:
+ _c_writer_float64(c_writer, value)
+ elif t is type(None):
+ _c_writer_entity(c_writer)
+ elif PY2 and t is long:
+ _c_writer_uint64(c_writer, value)
+ elif t is <type>&PyUnsignedLong_Type:
+ _c_writer_uint64(c_writer, value)
+ elif t is unicode:
+ _c_writer_unicode(c_writer, <unicode>value)
+ else:
+ _c_writer_write_fallback(c_writer, value)
+
+
+cdef _c_writer_write_fallback(C.yson_writer* c_writer, obj):
+ if isinstance(obj, (YsonEntity, YsonString, YsonInt64, YsonUInt64,
+ YsonFloat64, YsonBoolean, YsonList, YsonMap)) and \
+ obj.attributes is not None:
+ _c_writer_attributes(c_writer, obj.attributes)
+
+ if isinstance(obj, (bytes, unicode, YsonString)):
+ _c_writer_string(c_writer, obj)
+ elif isinstance(obj, (bool, YsonBoolean)):
+ _c_writer_boolean(c_writer, obj)
+ elif isinstance(obj, YsonUInt64):
+ _c_writer_uint64(c_writer, obj)
+ elif isinstance(obj, int):
+ _c_writer_int64(c_writer, obj)
+ elif PY2 and isinstance(obj, long):
+ _c_writer_uint64(c_writer, obj)
+ elif isinstance(obj, float):
+ _c_writer_float64(c_writer, obj)
+ elif isinstance(obj, (type(None), YsonEntity)):
+ _c_writer_entity(c_writer)
+ elif isinstance(obj, list):
+ _c_writer_list(c_writer, <list>obj)
+ elif isinstance(obj, tuple):
+ _c_writer_tuple(c_writer, <tuple>obj)
+ elif isinstance(obj, dict):
+ _c_writer_dict(c_writer, <dict>obj)
+ else:
+ if np_import_failed is None:
+ load_numpy_symbols()
+
+ if np is not None and isinstance(obj, npy_generic):
+ if isinstance(obj, npy_integers):
+ _c_writer_int64(c_writer, obj)
+ elif isinstance(obj, npy_uintegers):
+ _c_writer_uint64(c_writer, obj)
+ elif isinstance(obj, npy_floats):
+ _c_writer_float64(c_writer, obj)
+ else:
+ raise UnableToSerializeError(obj)
+ elif GenericCheckBuffer(obj):
+ _c_writer_string(c_writer, obj)
+ else:
+ raise UnableToSerializeError(obj)
+
+
+cdef void load_numpy_symbols() except *:
+ global np
+ global np_import_failed
+
+ try:
+ import numpy as np
+ np_import_failed = False
+ except ImportError:
+ np_import_failed = True
+ return
+
+ global npy_generic
+ npy_generic = np.generic
+
+ global npy_integers
+ npy_integers = (np.int8, np.int16, np.int32, np.int64)
+
+ global npy_uintegers
+ npy_uintegers = (np.uint8, np.uint16, np.uint32, np.uint64)
+
+ global npy_floats
+ npy_floats = (np.float16, np.float32, np.float64)
+
+
+# PyReader
+
+DEFAULT_PYREADER_SCALAR_HANDLERS = {}
+DEFAULT_PYREADER_LIST_HANDLERS = {}
+DEFAULT_PYREADER_MAP_HANDLERS = {}
+
+
+cdef class PyReader(Reader):
+ cdef public dict scalar_handlers
+ cdef public dict list_handlers
+ cdef public dict map_handlers
+
+ def __cinit__(
+ self,
+ InputStream stream not None,
+ mode=b'node',
+ scalar_handlers=DEFAULT_PYREADER_SCALAR_HANDLERS,
+ list_handlers=DEFAULT_PYREADER_LIST_HANDLERS,
+ map_handlers=DEFAULT_PYREADER_MAP_HANDLERS,
+ ):
+ self.scalar_handlers = scalar_handlers
+ self.list_handlers = list_handlers
+ self.map_handlers = map_handlers
+
+
+ cdef _scalar_handler(self, value, dict attributes):
+ handler = _pyreader_find_handler(attributes, self.scalar_handlers, b'scalar')
+ if handler is None:
+ return self._generic_scalar_handler(value, attributes)
+ else:
+ return handler(value)
+
+ cdef _list_handler(self, ListFragmentIterator items, dict attributes):
+ handler = _pyreader_find_handler(attributes, self.list_handlers, b'list')
+ if handler is None:
+ return self._generic_list_handler(items, attributes)
+ else:
+ return handler(items)
+
+ cdef _map_handler(self, MapFragmentIterator items, dict attributes):
+ handler = _pyreader_find_handler(attributes, self.map_handlers, b'map')
+ if handler is None:
+ return self._generic_map_handler(items, attributes)
+ else:
+ return handler(items)
+
+ cdef _generic_scalar_handler(self, value, dict attributes):
+ return value
+
+ cdef _generic_list_handler(self, ListFragmentIterator items, dict attributes):
+ return list(items)
+
+ cdef _generic_map_handler(self, MapFragmentIterator items, dict attributes):
+ return dict(items)
+
+
+def pyreader_scalar_handler(py_type):
+ def wrapper(function):
+ DEFAULT_PYREADER_SCALAR_HANDLERS[py_type] = function
+ return function
+ return wrapper
+
+
+def pyreader_list_handler(py_type):
+ def wrapper(function):
+ DEFAULT_PYREADER_LIST_HANDLERS[py_type] = function
+ return function
+ return wrapper
+
+
+def pyreader_map_handler(py_type):
+ def wrapper(function):
+ DEFAULT_PYREADER_MAP_HANDLERS[py_type] = function
+ return function
+ return wrapper
+
+
+cdef _pyreader_find_handler(dict attributes, dict handlers, bytes type):
+ py_type = attributes.get(b'py')
+ if py_type is None:
+ return None
+
+ handler = handlers.get(py_type)
+ if handler is None:
+ raise ValueError("No {} handler for {}".format(type, py_type))
+
+ return handler
+
+
+@pyreader_scalar_handler(b'unicode')
+def _pyreader_read_unicode(bytes value not None):
+ return PyUnicode_DecodeUTF8(
+ PyBytes_AS_STRING(value), PyBytes_GET_SIZE(value), NULL
+ )
+
+
+@pyreader_list_handler(b'dict')
+def _pyreader_read_dict(ListFragmentIterator items not None):
+ result = {}
+ try:
+ while True:
+ key = next(items)
+ value = next(items)
+ result[key] = value
+ except StopIteration:
+ pass
+ return result
+
+
+pyreader_scalar_handler(b'long')(long)
+
+pyreader_list_handler(b'list')(list)
+pyreader_list_handler(b'tuple')(tuple)
+pyreader_list_handler(b'set')(set)
+pyreader_list_handler(b'frozenset')(frozenset)
+
+
+# PyWriter
+
+DEFAULT_PYWRITER_HANDLERS = {}
+
+cdef class PyWriter(Writer):
+ cdef public dict handlers
+
+ def __cinit__(
+ self,
+ OutputStream stream not None,
+ format=b'text',
+ mode=b'node',
+ int indent=4,
+ handlers=DEFAULT_PYWRITER_HANDLERS
+ ):
+ self.handlers = handlers
+
+ cpdef py_type(self, bytes name):
+ _c_pywriter_py_type(self.c_writer, name)
+ return self
+
+ cpdef write(self, obj):
+ handler = self.handlers.get(type(obj))
+ if handler is None:
+ raise UnableToSerializeError(obj)
+
+ handler(self, obj)
+ return self
+
+def pywriter_handler(type_):
+ def wrapper(function):
+ DEFAULT_PYWRITER_HANDLERS[type_] = function
+ return function
+
+ return wrapper
+
+
+cdef _c_pywriter_py_type(C.yson_writer* c_writer, bytes name):
+ _c_writer_begin_attributes(c_writer)
+ _c_writer_key_bytes(c_writer, b'py')
+ _c_writer_bytes(c_writer, name)
+ _c_writer_end_attributes(c_writer)
+
+
+@pywriter_handler(type(None))
+def _pywriter_write_none(PyWriter writer, _):
+ _c_writer_entity(writer.c_writer)
+
+
+@pywriter_handler(bool)
+def _pywriter_write_bool(PyWriter writer, bint obj):
+ _c_writer_boolean(writer.c_writer, obj)
+
+
+if PY2:
+ @pywriter_handler(int)
+ def _pywriter_write_int(PyWriter writer, int64_t obj):
+ _c_writer_int64(writer.c_writer, obj)
+
+
+@pywriter_handler(long)
+def _pywriter_write_long(PyWriter writer, obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'long')
+ _c_writer_bytes(writer.c_writer, ConvertPyLongToPyBytes(obj))
+
+
+@pywriter_handler(float)
+def _pywriter_write_float(PyWriter writer, double obj):
+ _c_writer_float64(writer.c_writer, obj)
+
+
+@pywriter_handler(bytes)
+def _pywriter_write_bytes(PyWriter writer, bytes obj not None):
+ _c_writer_bytes(writer.c_writer, obj)
+
+
+@pywriter_handler(unicode)
+def _pywriter_write_unicode(PyWriter writer, unicode obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'unicode')
+ _c_writer_unicode(writer.c_writer, obj)
+
+
+cdef inline _pywriter_write_iterable(PyWriter writer, obj):
+ _c_writer_begin_list(writer.c_writer)
+ for item in obj:
+ writer.write(item)
+ _c_writer_end_list(writer.c_writer)
+
+
+@pywriter_handler(list)
+def _pywriter_write_list(PyWriter writer, list obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'list')
+ #_write_iterable(writer, obj)
+ # inline manually for type monomorphization
+ _c_writer_begin_list(writer.c_writer)
+ for item in obj:
+ writer.write(item)
+ _c_writer_end_list(writer.c_writer)
+
+
+@pywriter_handler(tuple)
+def _pywriter_write_tuple(PyWriter writer, tuple obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'tuple')
+ # _write_iterable(writer, obj)
+ # inline manually for type monomorphization
+ _c_writer_begin_list(writer.c_writer)
+ for item in obj:
+ writer.write(item)
+ _c_writer_end_list(writer.c_writer)
+
+
+@pywriter_handler(set)
+def _pywriter_write_set(PyWriter writer, set obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'set')
+ _pywriter_write_iterable(writer, obj)
+
+
+@pywriter_handler(frozenset)
+def _pywriter_write_frozenset(PyWriter writer, frozenset obj not None):
+ _c_pywriter_py_type(writer.c_writer, b'frozenset')
+ _pywriter_write_iterable(writer, obj)
+
+
+@pywriter_handler(dict)
+def _pywriter_write_dict(PyWriter writer, dict obj not None):
+ cdef bint good_keys
+ cdef PyObject* c_key
+ cdef PyObject* c_value
+ cdef Py_ssize_t c_pos = 0
+
+ # Check whether all keys are strings
+ good_keys = True
+ while PyDict_Next(obj, &c_pos, &c_key, &c_value):
+ if not isinstance(<object>c_key, bytes):
+ good_keys = False
+ break
+
+ c_pos = 0
+
+ if good_keys:
+ # All keys are strings, can use YSON map form
+ _c_writer_begin_map(writer.c_writer)
+ while PyDict_Next(obj, &c_pos, &c_key, &c_value):
+ _c_writer_key_bytes(writer.c_writer, <bytes>c_key)
+ writer.write(<object>c_value)
+ _c_writer_end_map(writer.c_writer)
+ else:
+ # Some keys are not strings, need to write dictionary as a list
+ _c_pywriter_py_type(writer.c_writer, b'dict')
+ _c_writer_begin_list(writer.c_writer)
+ while PyDict_Next(obj, &c_pos, &c_key, &c_value):
+ writer.write(<object>c_key)
+ writer.write(<object>c_value)
+ _c_writer_end_list(writer.c_writer)
+
+
+# Simple API
+
+@cython.returns(bytes)
+def dumps(value, format=b'text', Writer not None=Writer):
+ r"""Convert an object to YSON node string.
+
+ :param value: Python object to convert.
+ :param format: YSON format to use: 'binary', 'text' or 'pretty'.
+ :param Writer: YSON Writer class, may be supplied for custom serialization policies.
+
+ >>> from cyson import dumps
+
+ >>> print dumps(1234)
+ 1234
+ >>> print dumps("Hello world! Привет!")
+ "Hello world! Привет!"
+ >>> print dumps([1, "foo", None, {'aaa': 'bbb'}])
+ [1; "foo"; #; {"aaa" = "bbb"}]
+ >>> dumps([1, "foo", None, {'aaa': 'bbb'}], format='binary')
+ '[\x02\x02;\x01\x06foo;#;{\x01\x06aaa=\x01\x06bbb}]'
+ >>> print dumps([1, "foo", None, {'aaa': 'bbb'}], format='pretty')
+ [
+ 1;
+ "foo";
+ #;
+ {
+ "aaa" = "bbb"
+ }
+ ]
+
+ """
+
+ sink = PyByteArray_FromStringAndSize(NULL, 0)
+ writer = Writer(
+ OutputStream.from_bytearray(sink, 200),
+ format
+ )
+ writer.begin_stream().write(value).end_stream()
+
+ return PyBytes_FromStringAndSize(
+ PyByteArray_AS_STRING(sink), PyByteArray_GET_SIZE(sink)
+ )
+
+
+def dumps_into(bytearray dest, value, format=b'text', Writer not None=Writer):
+ r"""Convert an object to YSON node string.
+
+ :param dest: Destination bytearray.
+ :param value: Python object to convert.
+ :param format: YSON format to use: 'binary', 'text' or 'pretty'.
+ :param Writer: YSON Writer class, may be supplied for custom serialization policies.
+
+ >>> dest = bytearray()
+ >>> dumps_into(dest, [1, "foo", None, {'aaa': 'bbb'}])
+ >>> dest
+ bytearray(b'[1; "foo"; #; {"aaa" = "bbb"}]')
+
+ """
+
+ writer = Writer(
+ OutputStream.from_bytearray(dest, 200),
+ format
+ )
+ writer.begin_stream().write(value).end_stream()
+
+
+@cython.returns(object)
+def loads(value, Reader not None=Reader):
+ r"""Convert a YSON node string to Python object.
+
+ :param value: YSON node string.
+ :param Reader: YSON Reader class, may be supplied for custom serialization policies.
+
+ >>> from cyson import loads
+
+ >>> loads('1234')
+ 1234
+ >>> loads('3.14')
+ 3.14
+ >>> loads('[1; "foo"; #; {"aaa" = "bbb"}]')
+ [1, 'foo', None, {'aaa': 'bbb'}]
+ >>> loads('[\x02\x02;\x01\x06foo;#;{\x01\x06aaa=\x01\x06bbb}]')
+ [1, 'foo', None, {'aaa': 'bbb'}]
+
+ """
+
+ reader = Reader(
+ InputStream.from_string(value)
+ )
+
+ return reader.node()
+
+
+def list_fragments(InputStream stream not None,
+ Reader not None=Reader,
+ bint process_table_index=False,
+ bint process_attributes=False,
+ bint stop_at_key_switch=False,
+ bint keep_control_records=False):
+
+ reader = Reader(stream, b'list_fragment')
+
+ # NOTE: for backward compatibility purpose
+ process_attributes = process_attributes or process_table_index
+
+ return reader.list_fragments(
+ process_attributes=process_attributes,
+ stop_at_key_switch=stop_at_key_switch,
+ keep_control_records=keep_control_records,
+ )
+
+
+cdef inline void exhaust_key_switched_iterator(ListFragmentIterator iterator):
+ if not (iterator.is_key_switched or iterator.at_end):
+ for _ in iterator:
+ pass
+
+
+def _make_first_group_iterator(first_value, ListFragmentIterator iterator):
+ yield first_value
+
+ if not (iterator.is_key_switched or iterator.at_end):
+ for item in iterator:
+ yield item
+
+
+def key_switched_list_fragments(
+ InputStream stream not None, Reader not None=Reader
+):
+ cdef ListFragmentIterator iterator = list_fragments(
+ stream,
+ Reader,
+ process_attributes=True,
+ stop_at_key_switch=True,
+ keep_control_records=False,
+ )
+
+ cdef PyObject* first_value = PyIter_Next(iterator)
+
+ if not first_value:
+ return
+
+ yield _make_first_group_iterator(<object>first_value, iterator)
+
+ Py_DECREF(<object>first_value)
+
+ # manually iterate over unused records in group
+ exhaust_key_switched_iterator(iterator)
+
+ while not iterator.at_end:
+ yield iterator
+
+ # manually iterate over unused records in group
+ exhaust_key_switched_iterator(iterator)
+
+
+def map_fragments(InputStream stream not None, Reader not None=Reader):
+ reader = Reader(stream, b'map_fragment')
+ return reader.map_fragments()
diff --git a/library/python/cyson/cyson/helpers.cpp b/library/python/cyson/cyson/helpers.cpp
new file mode 100644
index 0000000000..ae4a5dd2fa
--- /dev/null
+++ b/library/python/cyson/cyson/helpers.cpp
@@ -0,0 +1,179 @@
+#include "helpers.h"
+
+#include <util/system/compiler.h>
+
+namespace NCYson {
+ void SetPrettyTypeError(PyObject* obj, const char* expected) {
+#if PY_MAJOR_VERSION >= 3
+ PyObject* bytes_repr = nullptr;
+ PyObject* tmp = PyObject_Repr(obj);
+ if (Y_LIKELY(tmp)) {
+ bytes_repr = PyUnicode_AsUTF8String(tmp);
+ Py_DECREF(tmp);
+ }
+#else
+ PyObject* bytes_repr = PyObject_Repr(obj);
+#endif
+ assert(PyBytes_Check(bytes_repr));
+
+ PyErr_Format(
+ PyExc_TypeError,
+ "expected %s, got %s (%s)",
+ expected,
+ Py_TYPE(obj)->tp_name,
+ bytes_repr ? PyBytes_AS_STRING(bytes_repr) : "<repr failed>");
+
+ Py_XDECREF(bytes_repr);
+ }
+
+ PyObject* ConvertPyStringToPyBytes(PyObject* obj) {
+ if (PyBytes_Check(obj)) {
+ Py_INCREF(obj);
+ return obj;
+ }
+
+ if (PyUnicode_Check(obj)) {
+ return PyUnicode_AsUTF8String(obj);
+ }
+
+ SetPrettyTypeError(obj, "bytes or unicode");
+
+ return nullptr;
+ }
+
+#define FILL_DATA_FROM_BUFFER \
+ *data = (char*)view.buf; \
+ *size = (size_t)view.len; \
+ PyBuffer_Release(&view)
+
+ PyObject* GetCharBufferAndOwner(PyObject* obj, const char** data, size_t* size) {
+#if PY_MAJOR_VERSION >= 3
+ Py_buffer view;
+#endif
+
+ if (PyUnicode_Check(obj)) {
+ PyObject* encoded = PyUnicode_AsUTF8String(obj);
+ if (!encoded) {
+ return nullptr;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ if (PyObject_GetBuffer(encoded, &view, PyBUF_SIMPLE) < 0) {
+#else
+ if (PyObject_AsCharBuffer(encoded, data, (Py_ssize_t*)size) < 0) {
+#endif
+ Py_DECREF(encoded);
+ return nullptr;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ FILL_DATA_FROM_BUFFER;
+#endif
+
+ return encoded;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ if (PyObject_GetBuffer(obj, &view, PyBUF_SIMPLE) < 0) {
+#else
+ if (PyObject_AsCharBuffer(obj, data, (Py_ssize_t*)size) < 0) {
+#endif
+ return nullptr;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ FILL_DATA_FROM_BUFFER;
+#endif
+
+ Py_INCREF(obj);
+
+ return obj;
+ }
+
+#undef FILL_DATA_FROM_BUFFER
+
+ PyObject* ConvertPyStringToPyNativeString(PyObject* obj) {
+ if (PyBytes_Check(obj)) {
+#if PY_MAJOR_VERSION >=3
+ return PyUnicode_DecodeUTF8(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj), nullptr);
+#else
+ Py_INCREF(obj);
+ return obj;
+#endif
+ }
+
+ if (PyUnicode_Check(obj)) {
+#if PY_MAJOR_VERSION >=3
+ Py_INCREF(obj);
+ return obj;
+#else
+ return PyUnicode_AsUTF8String(obj);
+#endif
+ }
+
+ SetPrettyTypeError(obj, "bytes or unicode");
+
+ return nullptr;
+ }
+
+ PyObject* ConvertPyLongToPyBytes(PyObject* obj) {
+ PyObject* result;
+
+ if (!PyLong_Check(obj)) {
+ SetPrettyTypeError(obj, "long");
+ return nullptr;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ PyObject* tmp = _PyLong_Format(obj, 10);
+ if (!tmp) {
+ return nullptr;
+ }
+
+ result = PyUnicode_AsUTF8String(tmp);
+
+ Py_DECREF(tmp);
+#else
+ result = _PyLong_Format(obj, 10, 0, 0);
+#endif
+ return result;
+ }
+
+ namespace NPrivate {
+ TPyObjectPtr::TPyObjectPtr() {
+ Ptr_ = nullptr;
+ }
+
+ void TPyObjectPtr::Reset(PyObject* ptr) {
+ PyObject* tmp = Ptr_;
+ Py_XINCREF(ptr);
+ Ptr_ = ptr;
+ Py_XDECREF(tmp);
+ }
+
+ PyObject* TPyObjectPtr::GetNew() {
+ Py_XINCREF(Ptr_);
+ return Ptr_;
+ }
+
+ PyObject* TPyObjectPtr::GetBorrowed() {
+ return Ptr_;
+ }
+
+ TPyObjectPtr::~TPyObjectPtr() {
+#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 7
+ if (_Py_IsFinalizing()) {
+ return;
+ }
+#elif PY_MAJOR_VERSION >= 3
+ // https://github.com/python/cpython/blob/3.6/Python/sysmodule.c#L1345
+ if (_Py_Finalizing != NULL) {
+ return;
+ }
+#endif
+ PyObject* tmp = Ptr_;
+ Ptr_ = nullptr;
+ Py_XDECREF(tmp);
+ }
+ }
+}
diff --git a/library/python/cyson/cyson/helpers.h b/library/python/cyson/cyson/helpers.h
new file mode 100644
index 0000000000..326e6ffbe1
--- /dev/null
+++ b/library/python/cyson/cyson/helpers.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <Python.h>
+
+namespace NCYson {
+ constexpr bool PY3 = PY_MAJOR_VERSION == 3;
+ constexpr bool PY2 = PY_MAJOR_VERSION == 2;
+
+ void SetPrettyTypeError(PyObject*, const char*);
+ PyObject* ConvertPyStringToPyBytes(PyObject*);
+ PyObject* GetCharBufferAndOwner(PyObject*, const char**, size_t*);
+ PyObject* ConvertPyStringToPyNativeString(PyObject*);
+ PyObject* ConvertPyLongToPyBytes(PyObject*);
+
+ inline PyObject* GetSelf(PyObject* self) {
+ Py_INCREF(self);
+ return self;
+ }
+
+ namespace NPrivate {
+ class TPyObjectPtr {
+ public:
+ void Reset(PyObject*);
+ PyObject* GetNew();
+ PyObject* GetBorrowed();
+
+ TPyObjectPtr();
+ ~TPyObjectPtr();
+
+ private:
+ PyObject* Ptr_;
+ };
+ }
+}
+
+#if PY_MAJOR_VERSION >= 3
+#define GenericCheckBuffer PyObject_CheckBuffer
+#define PyFile_CheckExact(x) 0
+#define PyFile_AsFile(x) (FILE*)(PyErr_Format(PyExc_NotImplementedError, "PyFile_AsFile not implemented for Python3"))
+#else
+#define GenericCheckBuffer PyObject_CheckReadBuffer
+#endif
+
+#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
+static inline void _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
+{ ob->ob_size = size; }
+#define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
+#endif
diff --git a/library/python/cyson/cyson/libcyson.pxd b/library/python/cyson/cyson/libcyson.pxd
new file mode 100644
index 0000000000..091870e4bc
--- /dev/null
+++ b/library/python/cyson/cyson/libcyson.pxd
@@ -0,0 +1,116 @@
+from libc.stddef cimport size_t
+from libc.stdint cimport uint64_t, int64_t
+from libc.stdio cimport FILE
+
+cdef extern from "<library/c/cyson/cyson.h>":
+ struct yson_input_stream:
+ pass
+ struct yson_output_stream:
+ pass
+
+ struct yson_reader:
+ pass
+ struct yson_writer:
+ pass
+
+ struct yson_string:
+ const char* ptr
+ size_t length
+
+ enum yson_event_type:
+ YSON_EVENT_BEGIN_STREAM
+ YSON_EVENT_END_STREAM
+ YSON_EVENT_BEGIN_LIST
+ YSON_EVENT_END_LIST
+ YSON_EVENT_BEGIN_MAP
+ YSON_EVENT_END_MAP
+ YSON_EVENT_BEGIN_ATTRIBUTES
+ YSON_EVENT_END_ATTRIBUTES
+ YSON_EVENT_KEY
+ YSON_EVENT_SCALAR
+ YSON_EVENT_ERROR
+
+ enum yson_scalar_type:
+ YSON_SCALAR_ENTITY
+ YSON_SCALAR_BOOLEAN
+ YSON_SCALAR_INT64
+ YSON_SCALAR_UINT64
+ YSON_SCALAR_FLOAT64
+ YSON_SCALAR_STRING
+
+ enum yson_input_stream_result:
+ YSON_INPUT_STREAM_RESULT_OK
+ YSON_INPUT_STREAM_RESULT_EOF
+ YSON_INPUT_STREAM_RESULT_ERROR
+
+ enum yson_output_stream_result:
+ YSON_OUTPUT_STREAM_RESULT_OK
+ YSON_OUTPUT_STREAM_RESULT_ERROR
+
+ enum yson_writer_result:
+ YSON_WRITER_RESULT_OK
+ YSON_WRITER_RESULT_BAD_STREAM
+ YSON_WRITER_RESULT_ERROR
+
+ enum yson_stream_type:
+ YSON_STREAM_TYPE_NODE
+ YSON_STREAM_TYPE_LIST_FRAGMENT
+ YSON_STREAM_TYPE_MAP_FRAGMENT
+
+ ctypedef yson_input_stream_result (*yson_input_stream_func)(
+ void* ctx,
+ const char** ptr,
+ size_t* length) except YSON_INPUT_STREAM_RESULT_ERROR
+
+ ctypedef yson_output_stream_result (*yson_output_stream_func)(
+ void* ctx,
+ const char* ptr,
+ size_t length) except YSON_OUTPUT_STREAM_RESULT_ERROR
+
+ yson_input_stream* yson_input_stream_from_string(const char* ptr, size_t length)
+ yson_input_stream* yson_input_stream_from_file(FILE* file, size_t buffer_size)
+ yson_input_stream* yson_input_stream_from_fd(int fd, size_t buffer_size)
+ yson_input_stream* yson_input_stream_new(void* ctx, yson_input_stream_func callback);
+ void yson_input_stream_delete(yson_input_stream* stream)
+
+ yson_output_stream* yson_output_stream_from_file(FILE* file, size_t buffer_size)
+ yson_output_stream* yson_output_stream_from_fd(int fd, size_t buffer_size)
+ yson_output_stream* yson_output_stream_new(void* ctx, yson_output_stream_func callback, size_t buffer_size);
+ void yson_output_stream_delete(yson_output_stream* stream)
+
+ yson_reader* yson_reader_new(yson_input_stream* stream, yson_stream_type mode)
+ void yson_reader_delete(yson_reader* reader)
+ const char* yson_reader_get_error_message(yson_reader* reader)
+
+ yson_event_type yson_reader_get_next_event(yson_reader* reader) except? YSON_EVENT_ERROR
+ yson_scalar_type yson_reader_get_scalar_type(yson_reader* reader)
+
+ bint yson_reader_get_boolean(yson_reader* reader)
+ int64_t yson_reader_get_int64(yson_reader* reader)
+ uint64_t yson_reader_get_uint64(yson_reader* reader)
+ double yson_reader_get_float64(yson_reader* reader)
+ const yson_string* yson_reader_get_string(yson_reader* reader)
+
+ yson_writer* yson_writer_new_binary(yson_output_stream* stream, yson_stream_type mode)
+ yson_writer* yson_writer_new_text(yson_output_stream* stream, yson_stream_type mode)
+ yson_writer* yson_writer_new_pretty_text(yson_output_stream* stream, yson_stream_type mode, size_t indent)
+ void yson_writer_delete(yson_writer* writer)
+ const char* yson_writer_get_error_message(yson_writer* writer)
+
+ yson_writer_result yson_writer_write_begin_stream(yson_writer* writer)
+ yson_writer_result yson_writer_write_end_stream(yson_writer* writer)
+ yson_writer_result yson_writer_write_begin_list(yson_writer* writer)
+ yson_writer_result yson_writer_write_end_list(yson_writer* writer)
+ yson_writer_result yson_writer_write_begin_map(yson_writer* writer)
+ yson_writer_result yson_writer_write_end_map(yson_writer* writer)
+ yson_writer_result yson_writer_write_begin_attributes(yson_writer* writer)
+ yson_writer_result yson_writer_write_end_attributes(yson_writer* writer)
+
+ yson_writer_result yson_writer_write_entity(yson_writer* writer)
+ yson_writer_result yson_writer_write_key(yson_writer* writer, const char* ptr, size_t length)
+ yson_writer_result yson_writer_write_string(yson_writer* writer, const char* ptr, size_t length)
+ yson_writer_result yson_writer_write_int64(yson_writer* writer, int64_t value)
+ yson_writer_result yson_writer_write_uint64(yson_writer* writer, uint64_t value)
+ yson_writer_result yson_writer_write_boolean(yson_writer* writer, int value)
+ yson_writer_result yson_writer_write_float64(yson_writer* writer, double value)
+
diff --git a/library/python/cyson/cyson/unsigned_long.cpp b/library/python/cyson/cyson/unsigned_long.cpp
new file mode 100644
index 0000000000..f20b3f106e
--- /dev/null
+++ b/library/python/cyson/cyson/unsigned_long.cpp
@@ -0,0 +1,292 @@
+#include "unsigned_long.h"
+
+#include "helpers.h"
+
+#include <util/generic/va_args.h>
+#include <util/system/compiler.h>
+
+#if (PY_MAJOR_VERSION == 2)
+#include <longintrepr.h>
+#endif
+
+#if (PY_MAJOR_VERSION >= 3) && defined(Py_LIMITED_API)
+#error "limited API for Python3 not supported yet"
+#endif
+
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
+#ifndef Py_TPFLAGS_CHECKTYPES
+#define Py_TPFLAGS_CHECKTYPES 0
+#endif
+
+namespace NCYson {
+ static NPrivate::TPyObjectPtr PyUnsignedLong_ReprPtr;
+
+ static void SetNegativeValueError(PyObject* obj, PyTypeObject* type) {
+ assert(PyLong_Check(obj));
+ assert(Py_SIZE(obj) < 0);
+
+ PyObject* repr = ConvertPyLongToPyBytes(obj);
+
+ PyErr_Format(
+ PyExc_OverflowError,
+ "cannot convert negative value (%s) to %s",
+ Y_LIKELY(repr) ? PyBytes_AS_STRING(repr) : "???",
+ type->tp_name);
+
+ Py_XDECREF(repr);
+ }
+
+ PyObject* PreparePyUIntType(PyObject* reprfunc) {
+ if (Y_UNLIKELY(PyType_Ready(&PyUnsignedLong_Type) < 0)) {
+ return nullptr;
+ }
+
+ PyUnsignedLong_ReprPtr.Reset(reprfunc);
+
+ Py_INCREF((PyObject*)&PyUnsignedLong_Type);
+
+ return (PyObject*)&PyUnsignedLong_Type;
+ }
+
+ PyObject* ConstructPyUIntFromPyLong(PyLongObject* obj) {
+ PyObject *result;
+ Py_ssize_t index;
+ Py_ssize_t size;
+
+ assert(PyLong_Check(obj));
+
+ size = Py_SIZE(obj);
+ if (size < 0) {
+ SetNegativeValueError((PyObject*)obj, &PyUnsignedLong_Type);
+ return nullptr;
+ }
+
+ result = PyLong_Type.tp_alloc(&PyUnsignedLong_Type, size);
+ if (Y_UNLIKELY(!result)) {
+ return nullptr;
+ }
+
+ assert(IsExactPyUInt(result));
+
+ Py_SET_SIZE(result, size);
+
+ for (index = 0; index < size; ++index) {
+ ((PyLongObject*)result)->ob_digit[index] = obj->ob_digit[index];
+ }
+
+ return result;
+ }
+
+ PyObject* ConstructPyUIntFromUint(uint64_t n) {
+ PyObject* result;
+ PyObject* tmp;
+
+ tmp = PyLong_FromUnsignedLong(n);
+ if (Y_UNLIKELY(!tmp)) {
+ return nullptr;
+ }
+
+ result = ConstructPyUIntFromPyLong((PyLongObject*)tmp);
+
+ Py_DECREF(tmp);
+
+ return result;
+ }
+
+ static PyObject* unsigned_long_new(PyTypeObject *type, PyObject *args, PyObject* kws) {
+ PyObject* result;
+
+ result = PyLong_Type.tp_new(type, args, kws);
+ if (Y_UNLIKELY(!result)) {
+ return nullptr;
+ }
+
+ assert(IsExactPyUInt(result));
+
+ if (Py_SIZE(result) < 0) {
+ SetNegativeValueError(result, type);
+ Py_DECREF(result);
+ return nullptr;
+ }
+
+ return result;
+ }
+
+ static PyObject* unsigned_long_repr(PyObject* self) {
+ PyObject* result;
+
+ PyObject* callable = PyUnsignedLong_ReprPtr.GetBorrowed();
+
+ if (callable) {
+ result = PyObject_CallFunctionObjArgs(callable, self, nullptr);
+ } else {
+ result = PyObject_Repr(self);
+ }
+
+ return result;
+ }
+
+#define PYOBJECT_ARG(o) PyObject* o,
+#define PYOBJECT_ARG_LAST(o) PyObject* o
+
+#define UNSIGNED_LONG_OPERATION(SLOT, ...) \
+ static PyObject* unsigned_long_##SLOT(Y_MAP_ARGS_WITH_LAST(PYOBJECT_ARG, PYOBJECT_ARG_LAST, __VA_ARGS__)) { \
+ PyObject* result = PyLong_Type.tp_as_number->nb_##SLOT(__VA_ARGS__); \
+ if (result && PyLong_CheckExact(result) && (Py_SIZE(result) >= 0)) { \
+ PyObject* tmp = result; \
+ result = ConstructPyUIntFromPyLong((PyLongObject*)tmp); \
+ Py_DECREF(tmp); \
+ } \
+ return result; \
+ }
+
+ UNSIGNED_LONG_OPERATION(add, x, y);
+ UNSIGNED_LONG_OPERATION(subtract, x, y);
+ UNSIGNED_LONG_OPERATION(multiply, x, y);
+#if PY_MAJOR_VERSION < 3
+ UNSIGNED_LONG_OPERATION(divide, x, y);
+#endif
+ UNSIGNED_LONG_OPERATION(remainder, x, y);
+ UNSIGNED_LONG_OPERATION(power, x, y, z);
+ UNSIGNED_LONG_OPERATION(lshift, x, y);
+ UNSIGNED_LONG_OPERATION(rshift, x, y);
+ UNSIGNED_LONG_OPERATION(and, x, y);
+ UNSIGNED_LONG_OPERATION(xor, x, y);
+ UNSIGNED_LONG_OPERATION(or, x, y);
+ UNSIGNED_LONG_OPERATION(floor_divide, x, y);
+ UNSIGNED_LONG_OPERATION(true_divide, x, y);
+
+#undef UNSIGNED_LONG_OPERATION
+#undef PYOBJECT_ARG_LAST
+#undef PYOBJECT_ARG
+
+ static PyNumberMethods unsigned_long_as_number = {
+ unsigned_long_add, /*nb_add*/
+ unsigned_long_subtract, /*nb_subtract*/
+ unsigned_long_multiply, /*nb_multiply*/
+#if PY_MAJOR_VERSION < 3
+ unsigned_long_divide, /*nb_divide*/
+#endif
+ unsigned_long_remainder, /*nb_remainder*/
+ 0, /*nb_divmod*/
+ unsigned_long_power, /*nb_power*/
+ 0, /*nb_negative*/
+ GetSelf, /*nb_positive*/
+ GetSelf, /*nb_absolute*/
+ 0, /*nb_nonzero*/
+ 0, /*nb_invert*/
+ unsigned_long_lshift, /*nb_lshift*/
+ unsigned_long_rshift, /*nb_rshift*/
+ unsigned_long_and, /*nb_and*/
+ unsigned_long_xor, /*nb_xor*/
+ unsigned_long_or, /*nb_or*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*nb_coerce*/
+#endif
+ 0, /*nb_int*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*nb_long*/
+#else
+ 0, /*reserved*/
+#endif
+ 0, /*nb_float*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*nb_oct*/
+ 0, /*nb_hex*/
+#endif
+ 0, /*nb_inplace_add*/
+ 0, /*nb_inplace_subtract*/
+ 0, /*nb_inplace_multiply*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*nb_inplace_divide*/
+#endif
+ 0, /*nb_inplace_remainder*/
+ 0, /*nb_inplace_power*/
+ 0, /*nb_inplace_lshift*/
+ 0, /*nb_inplace_rshift*/
+ 0, /*nb_inplace_and*/
+ 0, /*nb_inplace_xor*/
+ 0, /*nb_inplace_or*/
+ unsigned_long_floor_divide, /*nb_floor_divide*/
+ unsigned_long_true_divide, /*nb_true_divide*/
+ 0, /*nb_inplace_floor_divide*/
+ 0, /*nb_inplace_true_divide*/
+ 0, /*nb_index*/
+#if PY_VERSION_HEX >= 0x03050000
+ 0, /*nb_matrix_multiply*/
+ 0, /*nb_inplace_matrix_multiply*/
+#endif
+ };
+
+ PyTypeObject PyUnsignedLong_Type = {
+ PyVarObject_HEAD_INIT(0, 0)
+ "cyson._cyson.UInt", /*tp_name*/
+ PyLong_Type.tp_basicsize, /*tp_basicsize*/
+ PyLong_Type.tp_itemsize, /*tp_itemsize*/
+ PyLong_Type.tp_dealloc, /*tp_dealloc*/
+#if PY_VERSION_HEX < 0x030800b4
+ 0, /*tp_print*/
+#endif
+#if PY_VERSION_HEX >= 0x030800b4
+ 0, /*tp_vectorcall_offset*/
+#endif
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+#endif
+#if PY_MAJOR_VERSION >= 3
+ 0, /*tp_as_async*/
+#endif
+ unsigned_long_repr, /*tp_repr*/
+ &unsigned_long_as_number, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_LONG_SUBCLASS, /*tp_flags*/
+ "UInt(0) -> UInt\nUInt(x, base=10) -> UInt", /*tp_doc*/
+ 0, /*tp_traverse*/
+ 0, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ 0, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ &PyLong_Type, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ unsigned_long_new, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ 0, /*tp_version_tag*/
+#if PY_VERSION_HEX >= 0x030400a1
+ 0, /*tp_finalize*/
+#endif
+#if PY_VERSION_HEX >= 0x030800b1
+ 0, /*tp_vectorcall*/
+#endif
+#if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000
+ 0, /*tp_print*/
+#endif
+ };
+}
diff --git a/library/python/cyson/cyson/unsigned_long.h b/library/python/cyson/cyson/unsigned_long.h
new file mode 100644
index 0000000000..8f8c8c1da7
--- /dev/null
+++ b/library/python/cyson/cyson/unsigned_long.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include <Python.h>
+
+#include <stdint.h>
+
+namespace NCYson {
+ extern PyTypeObject PyUnsignedLong_Type;
+
+ PyObject* PreparePyUIntType(PyObject* repr = nullptr);
+ PyObject* ConstructPyUIntFromPyLong(PyLongObject*);
+ PyObject* ConstructPyUIntFromUint(uint64_t);
+
+ inline int IsExactPyUInt(PyObject* obj) {
+ return Py_TYPE(obj) == &PyUnsignedLong_Type;
+ }
+
+#if PY_MAJOR_VERSION >= 3
+ inline PyObject* ConstructPyNumberFromUint(uint64_t n) {
+ return ConstructPyUIntFromUint(n);
+ }
+#else
+ inline PyObject* ConstructPyNumberFromUint(uint64_t n) {
+ return PyLong_FromUnsignedLong(n);
+ }
+#endif
+}
diff --git a/library/python/cyson/pymodule/ya.make b/library/python/cyson/pymodule/ya.make
new file mode 100644
index 0000000000..dd308bd8a0
--- /dev/null
+++ b/library/python/cyson/pymodule/ya.make
@@ -0,0 +1,21 @@
+PY_ANY_MODULE(_cyson)
+
+IF (PYTHON_CONFIG MATCHES "python3" OR USE_SYSTEM_PYTHON MATCHES "3.")
+ PYTHON3_MODULE()
+ELSE()
+ PYTHON2_MODULE()
+ENDIF()
+
+NO_WSHADOW()
+
+PEERDIR(
+ library/c/cyson
+)
+
+SRCS(
+ library/python/cyson/cyson/_cyson.pyx
+ library/python/cyson/cyson/helpers.cpp
+ library/python/cyson/cyson/unsigned_long.cpp
+)
+
+END()
diff --git a/library/python/cyson/ut/test_control_attributes.py b/library/python/cyson/ut/test_control_attributes.py
new file mode 100644
index 0000000000..221542b12d
--- /dev/null
+++ b/library/python/cyson/ut/test_control_attributes.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+import itertools
+from functools import partial
+
+import pytest
+import six
+
+from cyson import (
+ YsonEntity, InputStream,
+ list_fragments, key_switched_list_fragments,
+ Reader, UnicodeReader
+)
+
+
+def filter_control_records(list):
+ return [
+ _ for _ in list
+ if not isinstance(_[2], YsonEntity)
+ ]
+
+
+def canonize(val, as_unicode):
+ _canonize = partial(canonize, as_unicode=as_unicode)
+
+ if isinstance(val, six.binary_type) and as_unicode:
+ return val.decode('utf8')
+ elif isinstance(val, six.text_type) and not as_unicode:
+ return val.encode('utf8')
+ elif isinstance(val, (list, tuple)):
+ return [_canonize(elem) for elem in val]
+ elif isinstance(val, dict):
+ return {_canonize(k): _canonize(v) for k, v in val.items()}
+ return val
+
+
+@pytest.mark.parametrize(
+ 'reader, as_unicode', [
+ [Reader, False],
+ [UnicodeReader, True],
+ ],
+)
+@pytest.mark.parametrize(
+ 'keep_control_records', [True, False]
+)
+def test_row_index(keep_control_records, reader, as_unicode):
+ _ = partial(canonize, as_unicode=as_unicode)
+
+ data = b"""
+ <row_index=0>#;
+ {a=1;b=2};
+ {a=2;b=3};
+ {a=3;b=4};
+ <row_index=10000>#;
+ {a=-1;b=-1};
+ {a=-2;b=-2};
+ """
+
+ iter = list_fragments(
+ stream=InputStream.from_string(data),
+ Reader=reader,
+ process_attributes=True,
+ keep_control_records=keep_control_records,
+ )
+ records = [(iter.range_index, iter.row_index, __) for __ in iter]
+
+ etalon = [
+ (None, -1, YsonEntity(attributes={b'row_index': 0})),
+ (None, 0, _({b'a': 1, b'b': 2})),
+ (None, 1, _({b'a': 2, b'b': 3})),
+ (None, 2, _({b'a': 3, b'b': 4})),
+ (None, 9999, YsonEntity(attributes={b'row_index': 10000})),
+ (None, 10000, _({b'a': -1, b'b': -1})),
+ (None, 10001, _({b'a': -2, b'b': -2})),
+ ]
+
+ if not keep_control_records:
+ etalon = filter_control_records(etalon)
+
+ assert records == etalon
+
+
+@pytest.mark.parametrize(
+ 'reader, as_unicode', [
+ [Reader, False],
+ [UnicodeReader, True],
+ ]
+)
+@pytest.mark.parametrize(
+ 'keep_control_records', [True, False],
+)
+@pytest.mark.parametrize(
+ 'parameter_name',
+ ['process_attributes', 'process_table_index']
+)
+def test_range_index(parameter_name, keep_control_records, reader, as_unicode):
+ _ = partial(canonize, as_unicode=as_unicode)
+
+ data = b"""
+ <range_index=2; row_index=0>#;
+ {a=1;b=2};
+ {a=2;b=3};
+ {a=3;b=4};
+ <range_index=0; row_index=10000>#;
+ {a=-1;b=-1};
+ {a=-2;b=-2};
+ """
+
+ iter = list_fragments(
+ stream=InputStream.from_string(data),
+ Reader=reader,
+ **{parameter_name: True, 'keep_control_records': keep_control_records}
+ )
+ records = [(iter.range_index, iter.row_index, __) for __ in iter]
+
+ etalon = [
+ (2, -1, YsonEntity(attributes={b'range_index': 2, b'row_index': 0})),
+ (2, 0, _({b'a': 1, b'b': 2})),
+ (2, 1, _({b'a': 2, b'b': 3})),
+ (2, 2, _({b'a': 3, b'b': 4})),
+ (0, 9999, YsonEntity(attributes={b'range_index': 0, b'row_index': 10000})),
+ (0, 10000, _({b'a': -1, b'b': -1})),
+ (0, 10001, _({b'a': -2, b'b': -2})),
+ ]
+
+ if not keep_control_records:
+ etalon = filter_control_records(etalon)
+
+ assert records == etalon
+
+
+@pytest.mark.parametrize(
+ 'reader, as_unicode', [
+ [Reader, False],
+ [UnicodeReader, True],
+ ]
+)
+def test_key_switch_first(reader, as_unicode):
+ _ = partial(canonize, as_unicode=as_unicode)
+
+ data = b"""
+ <key_switch=True>#;
+ {k=1;a=1;b=2};
+ {k=1;a=2;b=3};
+ {k=1;a=3;b=4};
+ <key_switch=True>#;
+ {k=2;a=-1;b=-1};
+ {k=2;a=-2;b=-2};
+ """
+
+ iter = key_switched_list_fragments(
+ stream=InputStream.from_string(data),
+ Reader=reader,
+ )
+ records = [list(__) for __ in iter]
+
+ assert records == [
+ [
+ _({b'k': 1, b'a': 1, b'b': 2}),
+ _({b'k': 1, b'a': 2, b'b': 3}),
+ _({b'k': 1, b'a': 3, b'b': 4}),
+ ],
+ [
+ _({b'k': 2, b'a': -1, b'b': -1}),
+ _({b'k': 2, b'a': -2, b'b': -2}),
+ ]
+ ]
+
+
+@pytest.mark.parametrize(
+ 'reader, as_unicode', [
+ [Reader, False],
+ [UnicodeReader, True],
+ ]
+)
+def test_key_switch_nofirst(reader, as_unicode):
+ _ = partial(canonize, as_unicode=as_unicode)
+
+ data = b"""
+ {k=1;a=1;b=2};
+ {k=1;a=2;b=3};
+ {k=1;a=3;b=4};
+ <key_switch=True>#;
+ {k=2;a=-1;b=-1};
+ {k=2;a=-2;b=-2};
+ """
+
+ iter = key_switched_list_fragments(
+ stream=InputStream.from_string(data),
+ Reader=reader
+ )
+ records = [list(__) for __ in iter]
+
+ assert records == [
+ [
+ _({b'k': 1, b'a': 1, b'b': 2}),
+ _({b'k': 1, b'a': 2, b'b': 3}),
+ _({b'k': 1, b'a': 3, b'b': 4}),
+ ],
+ [
+ _({b'k': 2, b'a': -1, b'b': -1}),
+ _({b'k': 2, b'a': -2, b'b': -2}),
+ ]
+ ]
+
+
+@pytest.mark.parametrize(
+ 'reader, as_unicode', [
+ [Reader, False],
+ [UnicodeReader, True],
+ ]
+)
+def test_key_switch_exhaust_unused_records(reader, as_unicode):
+ _ = partial(canonize, as_unicode=as_unicode)
+
+ data = b"""
+ {k=1;a=1;b=2};
+ {k=1;a=2;b=3};
+ {k=1;a=3;b=4};
+ <key_switch=True>#;
+ {k=2;a=-1;b=-1};
+ {k=2;a=-2;b=-2};
+ """
+
+ iter = key_switched_list_fragments(
+ stream=InputStream.from_string(data),
+ Reader=reader,
+ )
+
+ records = []
+
+ for group in iter:
+ records.append(
+ list(itertools.islice(group, 2))
+ )
+
+ assert records == [
+ [
+ _({b'k': 1, b'a': 1, b'b': 2}),
+ _({b'k': 1, b'a': 2, b'b': 3}),
+ ],
+ [
+ _({b'k': 2, b'a': -1, b'b': -1}),
+ _({b'k': 2, b'a': -2, b'b': -2}),
+ ]
+ ]
+
+
+@pytest.mark.parametrize('reader', [Reader, UnicodeReader])
+def test_key_switch_empty(reader):
+ assert list(
+ key_switched_list_fragments(
+ stream=InputStream.from_string(""),
+ Reader=reader,
+ )
+ ) == []
diff --git a/library/python/cyson/ut/test_input_stream.py b/library/python/cyson/ut/test_input_stream.py
new file mode 100644
index 0000000000..ae7c0e8f1e
--- /dev/null
+++ b/library/python/cyson/ut/test_input_stream.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+import atexit
+import io
+import os
+import tempfile
+
+import pytest
+import six
+
+from cyson import Reader, InputStream, dumps
+
+
+def prepare_file(string):
+ filepath = tempfile.mktemp()
+
+ with open(filepath, 'wb') as sink:
+ sink.write(string)
+
+ atexit.register(os.remove, filepath)
+
+ return filepath
+
+
+def prepare_bytesio(string, klass):
+ obj = klass()
+ obj.write(b'?:!;*')
+ obj.write(string)
+ obj.seek(5)
+
+ return obj
+
+
+def slice_string(string):
+ index = 0
+ length = len(string)
+
+ while index < length:
+ yield string[index:index + 2]
+ index += 2
+
+
+# <method name>, <input constructor>
+CASES = (
+ ('from_string', lambda x: x),
+ ('from_iter', slice_string),
+ ('from_file', lambda x: prepare_bytesio(x, io.BytesIO)),
+ ('from_file', lambda x: open(prepare_file(x), 'rb')),
+ ('from_fd', lambda x: os.open(prepare_file(x), os.O_RDONLY)),
+)
+
+if six.PY2:
+ import StringIO
+ import cStringIO
+
+ CASES += (
+ ('from_file', lambda x: prepare_bytesio(x, StringIO.StringIO)),
+ ('from_file', lambda x: prepare_bytesio(x, cStringIO.StringIO)),
+ )
+
+
+DATA = {u'a': [1, u'word', 3], b'b': b'xyz', u'c': None}
+ETALON = {b'a': [1, b'word', 3], b'b': b'xyz', b'c': None}
+
+
+@pytest.fixture(scope='module')
+def serialized_data():
+ return dumps(DATA, format='binary')
+
+
+def test_serizlized_data(serialized_data):
+ assert type(serialized_data) is bytes
+
+
+@pytest.mark.parametrize('method_name,make_input', CASES)
+def test_input_streams(method_name, make_input, serialized_data):
+ method = getattr(InputStream, method_name)
+ input_stream = method(make_input(serialized_data))
+
+ assert Reader(input_stream).node() == ETALON
diff --git a/library/python/cyson/ut/test_py_reader_writer.py b/library/python/cyson/ut/test_py_reader_writer.py
new file mode 100644
index 0000000000..0238040f50
--- /dev/null
+++ b/library/python/cyson/ut/test_py_reader_writer.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+import pytest
+import six
+
+from cyson import PyWriter, PyReader, dumps, loads, dumps_into
+
+
+if six.PY3:
+ unicode = str
+
+
+def switch_string_type(string):
+ if isinstance(string, unicode):
+ return string.encode('utf8')
+ elif isinstance(string, bytes):
+ return string.decode('utf8')
+
+ raise TypeError('expected bytes or unicode, got {!r}'.format(string))
+
+
+CASES = [
+ None,
+ # int
+ 0, 1, -1, 2**63, -2**63, 2**64 - 1,
+ # float
+ 0.0, 100.0, -100.0,
+ # long
+ 10**100, 2**300, -7**100,
+ # bytes
+ b'', b'hello', u'Привет'.encode('utf8'),
+ # unicode
+ u'', u'hello', u'Привет',
+ # tuple
+ (), (0,), (1, 'hello'), (17, 'q') * 100,
+ # list
+ [], [0], ['hello', set([1, 2, 3])], [17, 'q'] * 100,
+ # dict
+ {}, {'a': 'b'}, {'a': 17}, {'a': frozenset([1, 2, 3])}, {b'a': 1, u'b': 2},
+ {1: 2, 3: 4, 5: None}, {(1, 2, 3): (1, 4, 9), None: 0},
+ # set
+ set(), {1, 2, 3}, {'hello', 'world'},
+ # frozenset
+ frozenset(), frozenset([1, 2, 3]), frozenset(['hello', 'world']),
+]
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+@pytest.mark.parametrize('value', CASES)
+def test_roundtrip(value, format):
+ encoded = dumps(value, format=format, Writer=PyWriter)
+ decoded = loads(encoded, Reader=PyReader)
+ assert encoded == dumps(value, format=switch_string_type(format), Writer=PyWriter)
+ assert type(decoded) is type(value)
+ assert decoded == value
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+@pytest.mark.parametrize('value', CASES)
+def test_roundtrip_bytearray(value, format):
+ encoded1 = bytearray()
+ encoded2 = bytearray()
+ dumps_into(encoded1, value, format=format, Writer=PyWriter)
+ dumps_into(encoded2, value, format=switch_string_type(format), Writer=PyWriter)
+ decoded = loads(encoded1, Reader=PyReader)
+ assert decoded == loads(encoded2, Reader=PyReader)
+ assert type(decoded) is type(value)
+ assert decoded == value
diff --git a/library/python/cyson/ut/test_reader_writer.py b/library/python/cyson/ut/test_reader_writer.py
new file mode 100644
index 0000000000..6428ea0b56
--- /dev/null
+++ b/library/python/cyson/ut/test_reader_writer.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import print_function, absolute_import, division
+
+import io
+import math
+import pytest
+import six
+import sys
+
+from functools import partial
+
+from cyson import (
+ dumps, loads, YsonInt64, YsonUInt64, UInt, Writer, OutputStream,
+ UnicodeReader,
+)
+
+
+if six.PY2:
+ NativeUInt = long # noqa: F821
+elif six.PY3:
+ NativeUInt = UInt
+ unicode = str
+ long = int
+else:
+ raise RuntimeError('Unsupported Python version')
+
+
+def canonize(value, as_unicode=False):
+ _canonize = partial(canonize, as_unicode=as_unicode)
+
+ if isinstance(value, (list, tuple)):
+ return [_canonize(_) for _ in value]
+ elif isinstance(value, dict):
+ return {_canonize(k): _canonize(value[k]) for k in value}
+ elif isinstance(value, unicode) and not as_unicode:
+ return value.encode('utf8')
+ elif isinstance(value, bytes) and as_unicode:
+ return value.decode('utf8')
+
+ return value
+
+
+def switch_string_type(string):
+ if isinstance(string, bytes):
+ return string.decode('utf8')
+ elif isinstance(string, unicode):
+ return string.encode('utf8')
+
+ raise TypeError('expected unicode or bytes, got {!r}'.format(string))
+
+
+def coerce(obj, to, via=None):
+ if via is None:
+ via = to
+
+ if isinstance(obj, to):
+ return obj
+
+ return via(obj)
+
+
+SKIP_PY3 = pytest.mark.skipif(six.PY3, reason='Makes no sense for Python3')
+
+
+if six.PY3 and sys.platform == 'win32':
+ NUMPY_CASES = []
+else:
+ import numpy as np
+
+ NUMPY_CASES = [
+ # numpy int
+ np.int8(2 ** 7 - 1), np.int16(2 ** 15 - 1),
+ np.int32(2 ** 31 - 1), np.int64(2 ** 63 - 1),
+ # numpy uint
+ np.uint8(2 ** 8 - 1), np.uint16(2 ** 16 - 1),
+ np.uint32(2 ** 32 - 1), np.uint64(2 ** 64 - 1),
+ # numpy float
+ np.float16(100.0), np.float32(100.0), np.float64(100.0),
+ ]
+
+
+CASES = [
+ # NoneType
+ None,
+ # boolean
+ True, False,
+ # int
+ 0, 1, -1, int(2 ** 63 - 1), int(-2 ** 63),
+ # float
+ 0.0, 100.0, -100.0, float('inf'), float('-inf'),
+ # bytes
+ b'', b'hello', u'Привет'.encode('utf8'),
+ # unicode
+ u'', u'hello', u'Привет',
+ # list
+ [], [0], [1, 'hello'], [17, 'q'] * 100, [b'bytes'],
+ # tuple
+ (), (0,), (1, 'hello'), (17, 'q') * 100, (b'bytes',),
+ # dict
+ {}, {'a': 'b'}, {'a': 17}, {'a': [1, 2, 3]}, {b'a': 1, u'b': b'a'}
+] + NUMPY_CASES
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+@pytest.mark.parametrize('value', CASES)
+def test_roundtrip(value, format):
+ encoded = dumps(value, format)
+ decoded = loads(encoded)
+ assert encoded == dumps(value, switch_string_type(format))
+ assert decoded == canonize(value)
+
+
+# NOTE: roundtrip test doesn't work for NaN (NaN != NaN)
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+def test_nan(format):
+ encoded = dumps(float('nan'), format)
+ decoded = loads(encoded)
+ assert encoded == dumps(float('nan'), switch_string_type(format))
+ assert math.isnan(decoded)
+
+
+@SKIP_PY3
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+@pytest.mark.parametrize(
+ 'value', [long(0), long(1), long(2 ** 63), long(2 ** 64 - 1)]
+)
+def test_long_roundtrip(value, format):
+ encoded = dumps(value, format)
+ decoded = loads(encoded)
+ assert encoded == dumps(value, switch_string_type(format))
+ assert decoded == value
+
+
+@pytest.mark.parametrize(
+ 'value', [NativeUInt(0), NativeUInt(111), NativeUInt(2 ** 63), NativeUInt(2 ** 64 - 1)]
+)
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+def test_readwrite_uint64(value, format):
+ dumped_uint64 = dumps(coerce(value, YsonUInt64), format=format)
+ loaded_uint64 = loads(dumped_uint64)
+
+ assert type(value) is NativeUInt
+ assert type(loaded_uint64) is NativeUInt
+ assert dumps(value, format=format) == dumped_uint64
+
+
+@pytest.mark.parametrize('value', [int(-2 ** 63), -111, 0, 111, int(2 ** 63 - 1)])
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+def test_readwrite_int64(value, format):
+ dumped_int64 = dumps(YsonInt64(value), format=format)
+ loaded_int64 = loads(dumped_int64)
+
+ assert type(value) is int
+ assert type(loaded_int64) is int
+ assert dumps(value, format=format) == dumped_int64
+
+
+@SKIP_PY3
+def test_long_overflow():
+ with pytest.raises(OverflowError):
+ dumps(long(-1))
+
+ with pytest.raises(OverflowError):
+ dumps(long(2**64))
+
+
+@pytest.mark.parametrize('value', [2 ** 63, -2 ** 63 - 1])
+def test_int64_overflow(value):
+ with pytest.raises(OverflowError):
+ int64_value = YsonInt64(value)
+ dumps(int64_value)
+
+ if six.PY3:
+ with pytest.raises(OverflowError):
+ dumps(value)
+
+
+@pytest.mark.parametrize('value', [2 ** 64, 2 ** 100])
+def test_uint64_overflow(value):
+ with pytest.raises(OverflowError):
+ uint64_value = YsonUInt64(value)
+ dumps(uint64_value)
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+def test_force_write_sequence(format):
+ class Sequence(object):
+ def __init__(self, seq):
+ self._seq = seq
+
+ def __getitem__(self, index):
+ return self._seq[index]
+
+ def __len__(self):
+ return len(self._seq)
+
+ sequence = [1, 1.1, None, b'xyz']
+
+ sink = io.BytesIO()
+ writer = Writer(OutputStream.from_file(sink), format=format)
+
+ writer.begin_stream()
+ writer.list(Sequence(sequence))
+ writer.end_stream()
+
+ assert sink.getvalue() == dumps(sequence, format)
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+def test_force_write_mapping(format):
+ class Mapping(object):
+ def __init__(self, mapping):
+ self._mapping = mapping
+
+ def __getitem__(self, key):
+ return self._mapping[key]
+
+ def keys(self):
+ return self._mapping.keys()
+
+ mapping = {b'a': 1, b'b': 1.1, b'c': None, b'd': b'some'}
+
+ sink = io.BytesIO()
+ writer = Writer(OutputStream.from_file(sink), format=format)
+
+ writer.begin_stream()
+ writer.map(Mapping(mapping))
+ writer.end_stream()
+
+ assert sink.getvalue() == dumps(mapping, format)
+
+
+@pytest.mark.parametrize('format', ['binary', 'text', 'pretty'])
+@pytest.mark.parametrize('value', CASES)
+def test_unicode_reader(value, format):
+ expected = canonize(value, as_unicode=True)
+ got = loads(dumps(value, format), UnicodeReader)
+ assert expected == got
+
+
+def test_unicode_reader_raises_unicode_decode_error():
+ not_decodable = b'\x80\x81'
+ with pytest.raises(UnicodeDecodeError):
+ loads(dumps(not_decodable, format='binary'), UnicodeReader)
+
+
+def test_unicode_reader_decodes_object_with_attributes():
+ data = b'{"a" = "b"; "c" = <"foo" = "bar">"d"}'
+ expected = {u"a": u"b", u"c": u"d"}
+ assert loads(data, UnicodeReader) == expected
diff --git a/library/python/cyson/ut/test_unsigned_long.py b/library/python/cyson/ut/test_unsigned_long.py
new file mode 100644
index 0000000000..3cd4ffe440
--- /dev/null
+++ b/library/python/cyson/ut/test_unsigned_long.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python
+
+from __future__ import division
+
+import pytest
+import six
+
+from cyson import UInt
+
+
+if six.PY3:
+ long = int
+
+
+def equals_with_type(data, etalon):
+ return type(data) is type(etalon) and data == etalon
+
+
+def equals_as_uint(data, etalon):
+ return type(data) is UInt and data == etalon
+
+
+N = long(12)
+UN = UInt(N)
+
+
+def test_uint64_initialization():
+ assert UInt(2**63 - 1) == 2**63 - 1
+ assert UInt() == UInt(0) == 0
+ assert UInt(long(78)) == 78
+ assert UInt(23.57) == 23
+ assert UInt('111') == 111
+
+ with pytest.raises(OverflowError):
+ UInt(-10)
+
+
+def test_add():
+ assert equals_as_uint(UN + 1, N + 1)
+ assert equals_as_uint(UN + long(1), N + 1)
+ assert equals_as_uint(UN + UInt(1), N + 1)
+ assert equals_as_uint(1 + UN, 1 + N)
+ assert equals_as_uint(long(1) + UN, long(1) + N)
+ assert equals_as_uint(UInt(1) + UN, 1 + N)
+ assert equals_with_type(UN + 1.1, N + 1.1)
+ assert equals_with_type(1.1 + UN, 1.1 + N)
+ assert equals_with_type(UN + int(-N - 1), N + int(-N - 1))
+ assert equals_with_type(UN + long(-N - 1), N + long(-N - 1))
+ assert equals_with_type(int(-N - 1) + UN, int(-N - 1) + N)
+ assert equals_with_type(long(-N - 1) + UN, long(-N - 1) + N)
+
+
+def test_sub():
+ assert equals_as_uint(UN - 1, N - 1)
+ assert equals_as_uint(UN - long(1), N - long(1))
+ assert equals_as_uint(UN - UInt(1), N - 1)
+ assert equals_as_uint(13 - UN, 13 - UN)
+ assert equals_as_uint(UInt(13) - UN, long(13) - N)
+ assert equals_as_uint(long(13) - UN, long(13) - UN)
+ assert equals_with_type(UN - 0.1, N - 0.1)
+ assert equals_with_type(13.1 - UN, 13.1 - N)
+ assert equals_with_type(1 - UN, long(1) - N)
+ assert equals_with_type(long(1) - UN, long(1) - N)
+ assert equals_with_type(UInt(1) - UN, long(1) - N)
+ assert equals_with_type(UN - int(UN + 1), N - int(UN + 1))
+ assert equals_with_type(UN - long(UN + 1), N - long(UN + 1))
+ assert equals_with_type(UN - UInt(UN + 1), N - long(UN + 1))
+
+
+def test_mul():
+ assert equals_as_uint(UN * 2, N * 2)
+ assert equals_as_uint(UN * long(2), N * long(2))
+ assert equals_as_uint(UN * UInt(2), N * long(2))
+ assert equals_as_uint(2 * UN, 2 * N)
+ assert equals_as_uint(long(2) * UN, long(2) * UN)
+ assert equals_as_uint(UInt(2) * UN, long(2) * UN)
+ assert equals_with_type(-3 * UN, -3 * N)
+ assert equals_with_type(long(-3) * UN, long(-3) * N)
+ assert equals_with_type(UN * -3, N * -3)
+ assert equals_with_type(UN * long(-3), N * long(-3))
+ assert equals_with_type(UN * 1.1, N * 1.1)
+ assert equals_with_type(1.1 * UN, 1.1 * N)
+
+
+def test_truediv():
+ assert equals_with_type(UN / 1, N / long(1))
+ assert equals_with_type(UN / UInt(1), N / long(1))
+ assert equals_with_type(1 / UN, long(1) / N)
+ assert equals_with_type(UInt(1) / UN, long(1) / N)
+ assert equals_with_type(UN / N, N / long(N))
+ assert equals_with_type(UN / UInt(N), N / long(N))
+ assert equals_with_type(UN / -1, N / long(-1))
+ assert equals_with_type(-1 / UN, long(-1) / N)
+ assert equals_with_type(UN / 1.1, N / 1.1)
+ assert equals_with_type(1.1 / UN, 1.1 / N)
+
+
+def test_floordiv():
+ # floor division (__floordiv__)
+ assert equals_as_uint(UN // 1, N // 1)
+ assert equals_as_uint(UN // long(1), N // long(1))
+ assert equals_as_uint(UN // UInt(1), N // long(1))
+ assert equals_as_uint(1 // UN, 1 // N)
+ assert equals_as_uint(long(1) // UN, long(1) // N)
+ assert equals_as_uint(UInt(1) // UN, long(1) // N)
+ assert equals_as_uint(UN // N, N // N)
+ assert equals_as_uint(UN // UN, N // N)
+ assert equals_with_type(UN // -1, N // long(-1))
+ assert equals_with_type(UN // long(-1), N // long(-1))
+ assert equals_with_type(-1 // UN, -long(1) // N)
+ assert equals_with_type(long(-1) // UN, long(-1) // N)
+ assert equals_with_type(UN // 1.1, N // 1.1)
+ assert equals_with_type(1.1 // UN, 1.1 // N)
+
+
+def test_mod():
+ assert equals_as_uint(UN % 7, N % 7)
+ assert equals_as_uint(UN % long(7), N % long(7))
+ assert equals_as_uint(UN % UInt(7), N % long(7))
+ assert equals_as_uint(23 % UN, 23 % N)
+ assert equals_as_uint(long(23) % UN, long(23) % N)
+ assert equals_as_uint(UInt(23) % UN, long(23) % N)
+ assert equals_as_uint(-23 % UN, -23 % N)
+ assert equals_as_uint(long(-23) % UN, long(-23) % N)
+ assert equals_with_type(UN % -11, N % long(-11))
+ assert equals_with_type(UN % long(-11), N % long(-11))
+
+
+def test_pow():
+ assert equals_as_uint(UN ** 2, N ** 2)
+ assert equals_as_uint(UN ** long(2), N ** long(2))
+ assert equals_as_uint(UN ** UInt(2), N ** long(2))
+ assert equals_as_uint(2 ** UN, 2 ** N)
+ assert equals_as_uint(long(2) ** UN, long(2) ** N)
+ assert equals_as_uint(UInt(2) ** UN, long(2) ** N)
+ assert equals_with_type(UN ** -1, N ** long(-1))
+ assert equals_with_type(UN ** long(-1), N ** -long(1))
+ assert equals_with_type(UN ** 1.1, N ** 1.1)
+ assert equals_with_type(UN ** -1.1, N ** -1.1)
+ assert equals_with_type(1.1 ** UN, 1.1 ** N)
+ assert equals_with_type(UN ** 0.5, N ** 0.5)
+ assert equals_with_type(0.5 ** UN, 0.5 ** N)
+
+
+def test_neg():
+ assert equals_with_type(-UN, -N)
+ assert equals_with_type(-UInt(0), long(0))
+
+
+def test_pos():
+ assert equals_as_uint(+UN, N)
+ assert equals_as_uint(+UInt(0), 0)
+
+
+def test_abs():
+ assert equals_as_uint(abs(UN), N)
+ assert abs(UN) is UN
+
+
+def test_invert():
+ assert equals_with_type(~UN, ~N)
+ assert equals_with_type(~UInt(0), ~long(0))
+
+
+def test_lshift():
+ assert equals_as_uint(1 << UN, 1 << N)
+ assert equals_as_uint(long(1) << UN, long(1) << N)
+ assert equals_as_uint(UInt(1) << UN, long(1) << N)
+ assert equals_as_uint(UN << 2, N << 2)
+ assert equals_as_uint(UN << long(2), N << 2)
+ assert equals_as_uint(UN << UInt(2), N << 2)
+ assert equals_with_type(-1 << UN, -1 << N)
+ assert equals_with_type(long(-1) << UN, -long(1) << N)
+
+ with pytest.raises(TypeError):
+ UN << 1.1
+ with pytest.raises(TypeError):
+ 1.1 << UN
+ with pytest.raises(ValueError):
+ UN << -1
+
+
+def test_rshift():
+ assert equals_as_uint(10000 >> UN, 10000 >> N)
+ assert equals_as_uint(long(10000) >> UN, long(10000) >> N)
+ assert equals_as_uint(UInt(10000) >> UN, long(10000) >> N)
+ assert equals_as_uint(UN >> 2, N >> 2)
+ assert equals_as_uint(UN >> long(2), N >> long(2))
+ assert equals_as_uint(UN >> UInt(2), N >> long(2))
+ assert equals_with_type(-10000 >> UN, -10000 >> N)
+ assert equals_with_type(long(-10000) >> UN, long(-10000) >> N)
+
+ with pytest.raises(TypeError):
+ UN >> 1.1
+ with pytest.raises(TypeError):
+ 1.1 >> UN
+ with pytest.raises(ValueError):
+ UN >> -1
+
+
+def test_and():
+ assert equals_as_uint(UN & 15, N & 15)
+ assert equals_as_uint(UN & long(15), N & long(15))
+
+ with pytest.raises(TypeError):
+ UN & 1.1
+
+
+def test_or():
+ assert equals_as_uint(UN | 15, N | 15)
+ assert equals_as_uint(UN | long(15), N | long(15))
+
+ with pytest.raises(TypeError):
+ UN | 1.1
+
+
+def test_xor():
+ assert equals_as_uint(UN ^ 9, N ^ 9)
+ assert equals_as_uint(UN ^ long(9), N ^ long(9))
+
+ with pytest.raises(TypeError):
+ UN ^ 1.1
diff --git a/library/python/cyson/ut/ya.make b/library/python/cyson/ut/ya.make
new file mode 100644
index 0000000000..1af753735f
--- /dev/null
+++ b/library/python/cyson/ut/ya.make
@@ -0,0 +1,21 @@
+PY23_TEST()
+
+PEERDIR(
+ library/python/cyson
+)
+
+IF(NOT OS_WINDOWS)
+ PEERDIR(
+ contrib/python/numpy
+ )
+ENDIF()
+
+TEST_SRCS(
+ test_control_attributes.py
+ test_input_stream.py
+ test_py_reader_writer.py
+ test_reader_writer.py
+ test_unsigned_long.py
+)
+
+END()
diff --git a/library/python/cyson/ya.make b/library/python/cyson/ya.make
new file mode 100644
index 0000000000..3a66455904
--- /dev/null
+++ b/library/python/cyson/ya.make
@@ -0,0 +1,28 @@
+PY23_LIBRARY()
+
+NO_WSHADOW()
+
+PEERDIR(
+ library/c/cyson
+)
+
+SRCS(
+ cyson/helpers.cpp
+ cyson/unsigned_long.cpp
+)
+
+PY_SRCS(
+ TOP_LEVEL
+ cyson/_cyson.pyx
+ cyson/__init__.py
+)
+
+END()
+
+RECURSE(
+ pymodule
+)
+
+RECURSE_FOR_TESTS(
+ ut
+)