diff options
author | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
---|---|---|
committer | max42 <max42@yandex-team.com> | 2023-06-30 03:37:03 +0300 |
commit | fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a (patch) | |
tree | b8cbc1deb00309c7f1a7ab6df520a76cf0b5c6d7 /library/cpp/skiff | |
parent | 7bf166b1a7ed0af927f230022b245af618e998c1 (diff) | |
download | ydb-fac2bd72b4b31ec3238292caf8fb2a8aaa6d6c4a.tar.gz |
YT-19324: move YT provider to ydb/library/yql
This commit is formed by the following script: https://paste.yandex-team.ru/6f92e4b8-efc5-4d34-948b-15ee2accd7e7/text.
This commit has zero effect on all projects that depend on YQL.
The summary of changes:
- `yql/providers/yt -> ydb/library/yql/providers/yt `- the whole implementation of YT provider is moved into YDB code base for further export as a part of YT YQL plugin shared library;
- `yql/providers/stat/{expr_nodes,uploader} -> ydb/library/yql/providers/stat/{expr_nodes,uploader}` - a small interface without implementation and the description of stat expr nodes;
- `yql/core/extract_predicate/ut -> ydb/library/yql/core/extract_predicate/ut`;
- `yql/core/{ut,ut_common} -> ydb/library/yql/core/{ut,ut_common}`;
- `yql/core` is gone;
- `yql/library/url_preprocessing -> ydb/library/yql/core/url_preprocessing`.
**NB**: all new targets inside `ydb/` are under `IF (NOT CMAKE_EXPORT)` clause which disables them from open-source cmake generation and ya make build. They will be enabled in the subsequent commits.
Diffstat (limited to 'library/cpp/skiff')
-rw-r--r-- | library/cpp/skiff/public.h | 63 | ||||
-rw-r--r-- | library/cpp/skiff/skiff-inl.h | 39 | ||||
-rw-r--r-- | library/cpp/skiff/skiff.cpp | 591 | ||||
-rw-r--r-- | library/cpp/skiff/skiff.h | 259 | ||||
-rw-r--r-- | library/cpp/skiff/skiff_schema-inl.h | 61 | ||||
-rw-r--r-- | library/cpp/skiff/skiff_schema.cpp | 164 | ||||
-rw-r--r-- | library/cpp/skiff/skiff_schema.h | 121 | ||||
-rw-r--r-- | library/cpp/skiff/skiff_validator.cpp | 396 | ||||
-rw-r--r-- | library/cpp/skiff/skiff_validator.h | 39 | ||||
-rw-r--r-- | library/cpp/skiff/unittests/skiff_schema_ut.cpp | 148 | ||||
-rw-r--r-- | library/cpp/skiff/unittests/skiff_ut.cpp | 627 | ||||
-rw-r--r-- | library/cpp/skiff/unittests/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/skiff/ya.make | 16 | ||||
-rw-r--r-- | library/cpp/skiff/zerocopy_output_writer-inl.h | 51 | ||||
-rw-r--r-- | library/cpp/skiff/zerocopy_output_writer.cpp | 38 | ||||
-rw-r--r-- | library/cpp/skiff/zerocopy_output_writer.h | 41 |
16 files changed, 2666 insertions, 0 deletions
diff --git a/library/cpp/skiff/public.h b/library/cpp/skiff/public.h new file mode 100644 index 0000000000..d67c6f26ee --- /dev/null +++ b/library/cpp/skiff/public.h @@ -0,0 +1,63 @@ +#pragma once + +#include <vector> +#include <memory> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +enum class EWireType +{ + Nothing /* "nothing" */, + Int8 /* "int8" */, + Int16 /* "int16" */, + Int32 /* "int32" */, + Int64 /* "int64" */, + Int128 /* "int128" */, + Uint8 /* "uint8" */, + Uint16 /* "uint16" */, + Uint32 /* "uint32" */, + Uint64 /* "uint64" */, + Uint128 /* "uint128" */, + Double /* "double" */, + Boolean /* "boolean" */, + String32 /* "string32" */, + Yson32 /* "yson32" */, + + Tuple /* "tuple" */, + Variant8 /* "variant8" */, + Variant16 /* "variant16" */, + RepeatedVariant8 /* "repeated_variant8" */, + RepeatedVariant16 /* "repeated_variant16" */, +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TSkiffSchema; +using TSkiffSchemaPtr = std::shared_ptr<TSkiffSchema>; + +using TSkiffSchemaList = std::vector<TSkiffSchemaPtr>; + +class TSimpleTypeSchema; +using TSimpleTypeSchemaPtr = std::shared_ptr<TSimpleTypeSchema>; + +class TSkiffValidator; + +class TUncheckedSkiffParser; +class TCheckedSkiffParser; + +class TUncheckedSkiffWriter; +class TCheckedSkiffWriter; + +#ifdef DEBUG +using TCheckedInDebugSkiffParser = TCheckedSkiffParser; +using TCheckedInDebugSkiffWriter = TCheckedSkiffWriter; +#else +using TCheckedInDebugSkiffParser = TUncheckedSkiffParser; +using TCheckedInDebugSkiffWriter = TUncheckedSkiffWriter; +#endif + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/skiff-inl.h b/library/cpp/skiff/skiff-inl.h new file mode 100644 index 0000000000..a3f68a9374 --- /dev/null +++ b/library/cpp/skiff/skiff-inl.h @@ -0,0 +1,39 @@ +#pragma once + +#ifndef SKIFF_H +#error "Direct inclusion of this file is not allowed, include skiff.h" +// For the sake of sane code completion. +#include "skiff.h" +#endif +#undef SKIFF_H + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +template <EWireType wireType> +constexpr auto TUnderlyingIntegerType<wireType>::F() { + if constexpr (wireType == EWireType::Int8) { + return i8{}; + } else if constexpr (wireType == EWireType::Int16) { + return i16{}; + } else if constexpr (wireType == EWireType::Int32) { + return i32{}; + } else if constexpr (wireType == EWireType::Int64) { + return i64{}; + } else if constexpr (wireType == EWireType::Uint8) { + return ui8{}; + } else if constexpr (wireType == EWireType::Uint16) { + return ui16{}; + } else if constexpr (wireType == EWireType::Uint32) { + return ui32{}; + } else if constexpr (wireType == EWireType::Uint64) { + return ui64{}; + } else { + static_assert(wireType == EWireType::Int8, "expected integer wire type"); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/skiff.cpp b/library/cpp/skiff/skiff.cpp new file mode 100644 index 0000000000..cbdbdfe364 --- /dev/null +++ b/library/cpp/skiff/skiff.cpp @@ -0,0 +1,591 @@ +#include "skiff.h" + +#include "skiff_validator.h" + +#include <util/stream/buffered.h> +#include <util/system/byteorder.h> +#include <util/system/unaligned_mem.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +bool operator==(TInt128 lhs, TInt128 rhs) +{ + return lhs.Low == rhs.Low && lhs.High == rhs.High; +} + +bool operator!=(TInt128 lhs, TInt128 rhs) +{ + return !(lhs == rhs); +} + +bool operator==(TUint128 lhs, TUint128 rhs) +{ + return lhs.Low == rhs.Low && lhs.High == rhs.High; +} + +bool operator!=(TUint128 lhs, TUint128 rhs) +{ + return !(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////////// + +TUncheckedSkiffParser::TUncheckedSkiffParser(IZeroCopyInput* underlying) + : Underlying_(underlying) + , Buffer_(512 * 1024) +{ } + +TUncheckedSkiffParser::TUncheckedSkiffParser(const std::shared_ptr<TSkiffSchema>& /*schema*/, IZeroCopyInput* underlying) + : TUncheckedSkiffParser(underlying) +{ } + +i8 TUncheckedSkiffParser::ParseInt8() +{ + return ParseSimple<i8>(); +} + +i16 TUncheckedSkiffParser::ParseInt16() +{ + return ParseSimple<i16>(); +} + +i32 TUncheckedSkiffParser::ParseInt32() +{ + return ParseSimple<i32>(); +} + +i64 TUncheckedSkiffParser::ParseInt64() +{ + return ParseSimple<i64>(); +} + +ui8 TUncheckedSkiffParser::ParseUint8() +{ + return ParseSimple<ui8>(); +} + +ui16 TUncheckedSkiffParser::ParseUint16() +{ + return ParseSimple<ui16>(); +} + +ui32 TUncheckedSkiffParser::ParseUint32() +{ + return ParseSimple<ui32>(); +} + +ui64 TUncheckedSkiffParser::ParseUint64() +{ + return ParseSimple<ui64>(); +} + +TInt128 TUncheckedSkiffParser::ParseInt128() +{ + auto low = ParseSimple<ui64>(); + auto high = ParseSimple<i64>(); + return {low, high}; +} + +TUint128 TUncheckedSkiffParser::ParseUint128() +{ + auto low = ParseSimple<ui64>(); + auto high = ParseSimple<ui64>(); + return {low, high}; +} + +double TUncheckedSkiffParser::ParseDouble() +{ + return ParseSimple<double>(); +} + +bool TUncheckedSkiffParser::ParseBoolean() +{ + ui8 result = ParseSimple<ui8>(); + if (result > 1) { + ythrow TSkiffException() << "Invalid boolean value \"" << result << "\""; + } + return result; +} + +TStringBuf TUncheckedSkiffParser::ParseString32() +{ + ui32 len = ParseSimple<ui32>(); + const void* data = GetData(len); + return TStringBuf(static_cast<const char*>(data), len); +} + +TStringBuf TUncheckedSkiffParser::ParseYson32() +{ + return ParseString32(); +} + +ui8 TUncheckedSkiffParser::ParseVariant8Tag() +{ + return ParseSimple<ui8>(); +} + +ui16 TUncheckedSkiffParser::ParseVariant16Tag() +{ + return ParseSimple<ui16>(); +} + +template <typename T> +T TUncheckedSkiffParser::ParseSimple() +{ + return ReadUnaligned<T>(GetData(sizeof(T))); +} + +const void* TUncheckedSkiffParser::GetData(size_t size) +{ + if (RemainingBytes() >= size) { + const void* result = Position_; + Advance(size); + return result; + } + + return GetDataViaBuffer(size); +} + +const void* TUncheckedSkiffParser::GetDataViaBuffer(size_t size) +{ + Buffer_.Clear(); + Buffer_.Reserve(size); + while (Buffer_.Size() < size) { + size_t toCopy = Min(size - Buffer_.Size(), RemainingBytes()); + Buffer_.Append(Position_, toCopy); + Advance(toCopy); + + if (RemainingBytes() == 0) { + RefillBuffer(); + if (Exhausted_ && Buffer_.Size() < size) { + ythrow TSkiffException() << "Premature end of stream while parsing Skiff"; + } + } + } + return Buffer_.Data(); +} + +size_t TUncheckedSkiffParser::RemainingBytes() const +{ + Y_ASSERT(End_ >= Position_); + return End_ - Position_; +} + +void TUncheckedSkiffParser::Advance(size_t size) +{ + Y_ASSERT(size <= RemainingBytes()); + Position_ += size; + ReadBytesCount_ += size; +} + +void TUncheckedSkiffParser::RefillBuffer() +{ + size_t bufferSize = Underlying_->Next(&Position_); + End_ = Position_ + bufferSize; + if (bufferSize == 0) { + Exhausted_ = true; + } +} + +bool TUncheckedSkiffParser::HasMoreData() +{ + if (RemainingBytes() == 0 && !Exhausted_) { + RefillBuffer(); + } + return !(RemainingBytes() == 0 && Exhausted_); +} + +void TUncheckedSkiffParser::ValidateFinished() +{ } + +ui64 TUncheckedSkiffParser::GetReadBytesCount() const +{ + return ReadBytesCount_; +} + +//////////////////////////////////////////////////////////////////////////////// + +TCheckedSkiffParser::TCheckedSkiffParser(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyInput* stream) + : Parser_(stream) + , Validator_(std::make_unique<TSkiffValidator>(schema)) +{ } + +TCheckedSkiffParser::~TCheckedSkiffParser() = default; + +i8 TCheckedSkiffParser::ParseInt8() +{ + Validator_->OnSimpleType(EWireType::Int8); + return Parser_.ParseInt8(); +} + +i16 TCheckedSkiffParser::ParseInt16() +{ + Validator_->OnSimpleType(EWireType::Int16); + return Parser_.ParseInt16(); +} + +i32 TCheckedSkiffParser::ParseInt32() +{ + Validator_->OnSimpleType(EWireType::Int32); + return Parser_.ParseInt32(); +} + +i64 TCheckedSkiffParser::ParseInt64() +{ + Validator_->OnSimpleType(EWireType::Int64); + return Parser_.ParseInt64(); +} + +ui8 TCheckedSkiffParser::ParseUint8() +{ + Validator_->OnSimpleType(EWireType::Uint8); + return Parser_.ParseUint8(); +} + +ui16 TCheckedSkiffParser::ParseUint16() +{ + Validator_->OnSimpleType(EWireType::Uint16); + return Parser_.ParseUint16(); +} + +ui32 TCheckedSkiffParser::ParseUint32() +{ + Validator_->OnSimpleType(EWireType::Uint32); + return Parser_.ParseUint32(); +} + +ui64 TCheckedSkiffParser::ParseUint64() +{ + Validator_->OnSimpleType(EWireType::Uint64); + return Parser_.ParseUint64(); +} + +TInt128 TCheckedSkiffParser::ParseInt128() +{ + Validator_->OnSimpleType(EWireType::Int128); + return Parser_.ParseInt128(); +} + +TUint128 TCheckedSkiffParser::ParseUint128() +{ + Validator_->OnSimpleType(EWireType::Uint128); + return Parser_.ParseUint128(); +} + +double TCheckedSkiffParser::ParseDouble() +{ + Validator_->OnSimpleType(EWireType::Double); + return Parser_.ParseDouble(); +} + +bool TCheckedSkiffParser::ParseBoolean() +{ + Validator_->OnSimpleType(EWireType::Boolean); + return Parser_.ParseBoolean(); +} + +TStringBuf TCheckedSkiffParser::ParseString32() +{ + Validator_->OnSimpleType(EWireType::String32); + return Parser_.ParseString32(); +} + +TStringBuf TCheckedSkiffParser::ParseYson32() +{ + Validator_->OnSimpleType(EWireType::Yson32); + return Parser_.ParseYson32(); +} + +ui8 TCheckedSkiffParser::ParseVariant8Tag() +{ + Validator_->BeforeVariant8Tag(); + auto result = Parser_.ParseVariant8Tag(); + Validator_->OnVariant8Tag(result); + return result; +} + +ui16 TCheckedSkiffParser::ParseVariant16Tag() +{ + Validator_->BeforeVariant16Tag(); + auto result = Parser_.ParseVariant16Tag(); + Validator_->OnVariant16Tag(result); + return result; +} + +bool TCheckedSkiffParser::HasMoreData() +{ + return Parser_.HasMoreData(); +} + +void TCheckedSkiffParser::ValidateFinished() +{ + Validator_->ValidateFinished(); + Parser_.ValidateFinished(); +} + +ui64 TCheckedSkiffParser::GetReadBytesCount() const +{ + return Parser_.GetReadBytesCount(); +} + +//////////////////////////////////////////////////////////////////////////////// + +TUncheckedSkiffWriter::TUncheckedSkiffWriter(IZeroCopyOutput* underlying) + : Underlying_(underlying) +{ } + +TUncheckedSkiffWriter::TUncheckedSkiffWriter(IOutputStream* underlying) + : BufferedOutput_(MakeHolder<TBufferedOutput>(underlying)) + , Underlying_(BufferedOutput_.Get()) +{ } + +TUncheckedSkiffWriter::TUncheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& /*schema*/, IZeroCopyOutput* underlying) + : TUncheckedSkiffWriter(underlying) +{ } + +TUncheckedSkiffWriter::TUncheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& /*schema*/, IOutputStream* underlying) + : TUncheckedSkiffWriter(underlying) +{ } + +TUncheckedSkiffWriter::~TUncheckedSkiffWriter() +{ + try { + Flush(); + } catch (...) { + } +} + +void TUncheckedSkiffWriter::WriteInt8(i8 value) +{ + WriteSimple<i8>(value); +} + +void TUncheckedSkiffWriter::WriteInt16(i16 value) +{ + WriteSimple<i16>(value); +} + +void TUncheckedSkiffWriter::WriteInt32(i32 value) +{ + WriteSimple<i32>(value); +} + +void TUncheckedSkiffWriter::WriteInt64(i64 value) +{ + WriteSimple<i64>(value); +} + +void TUncheckedSkiffWriter::WriteInt128(TInt128 value) +{ + WriteSimple<ui64>(value.Low); + WriteSimple<i64>(value.High); +} + +void TUncheckedSkiffWriter::WriteUint128(TUint128 value) +{ + WriteSimple<ui64>(value.Low); + WriteSimple<ui64>(value.High); +} + +void TUncheckedSkiffWriter::WriteUint8(ui8 value) +{ + WriteSimple<ui8>(value); +} + +void TUncheckedSkiffWriter::WriteUint16(ui16 value) +{ + WriteSimple<ui16>(value); +} + +void TUncheckedSkiffWriter::WriteUint32(ui32 value) +{ + WriteSimple<ui32>(value); +} + +void TUncheckedSkiffWriter::WriteUint64(ui64 value) +{ + WriteSimple<ui64>(value); +} + +void TUncheckedSkiffWriter::WriteDouble(double value) +{ + return WriteSimple<double>(value); +} + +void TUncheckedSkiffWriter::WriteBoolean(bool value) +{ + return WriteSimple<ui8>(value ? 1 : 0); +} + +void TUncheckedSkiffWriter::WriteString32(TStringBuf value) +{ + WriteSimple<ui32>(value.size()); + Underlying_.Write(value.data(), value.size()); +} + +void TUncheckedSkiffWriter::WriteYson32(TStringBuf value) +{ + WriteSimple<ui32>(value.size()); + Underlying_.Write(value.data(), value.size()); +} + +void TUncheckedSkiffWriter::WriteVariant8Tag(ui8 tag) +{ + WriteSimple<ui8>(tag); +} + +void TUncheckedSkiffWriter::WriteVariant16Tag(ui16 tag) +{ + WriteSimple<ui16>(tag); +} + +void TUncheckedSkiffWriter::Flush() +{ + Underlying_.UndoRemaining(); + if (BufferedOutput_) { + BufferedOutput_->Flush(); + } +} + +template <typename T> +Y_FORCE_INLINE void TUncheckedSkiffWriter::WriteSimple(T value) +{ + if constexpr (std::is_integral_v<T>) { + value = HostToLittle(value); + Underlying_.Write(&value, sizeof(T)); + } else { + Underlying_.Write(&value, sizeof(T)); + } +} + +void TUncheckedSkiffWriter::Finish() +{ + Flush(); +} + +//////////////////////////////////////////////////////////////////////////////// + +TCheckedSkiffWriter::TCheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyOutput* underlying) + : Writer_(underlying) + , Validator_(std::make_unique<TSkiffValidator>(schema)) +{ } + +TCheckedSkiffWriter::TCheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IOutputStream* underlying) + : Writer_(underlying) + , Validator_(std::make_unique<TSkiffValidator>(schema)) +{ } + +TCheckedSkiffWriter::~TCheckedSkiffWriter() = default; + +void TCheckedSkiffWriter::WriteDouble(double value) +{ + Validator_->OnSimpleType(EWireType::Double); + Writer_.WriteDouble(value); +} + +void TCheckedSkiffWriter::WriteBoolean(bool value) +{ + Validator_->OnSimpleType(EWireType::Boolean); + Writer_.WriteBoolean(value); +} + +void TCheckedSkiffWriter::WriteInt8(i8 value) +{ + Validator_->OnSimpleType(EWireType::Int8); + Writer_.WriteInt8(value); +} + +void TCheckedSkiffWriter::WriteInt16(i16 value) +{ + Validator_->OnSimpleType(EWireType::Int16); + Writer_.WriteInt16(value); +} + +void TCheckedSkiffWriter::WriteInt32(i32 value) +{ + Validator_->OnSimpleType(EWireType::Int32); + Writer_.WriteInt32(value); +} + +void TCheckedSkiffWriter::WriteInt64(i64 value) +{ + Validator_->OnSimpleType(EWireType::Int64); + Writer_.WriteInt64(value); +} + +void TCheckedSkiffWriter::WriteUint8(ui8 value) +{ + Validator_->OnSimpleType(EWireType::Uint8); + Writer_.WriteUint8(value); +} + +void TCheckedSkiffWriter::WriteUint16(ui16 value) +{ + Validator_->OnSimpleType(EWireType::Uint16); + Writer_.WriteUint16(value); +} + +void TCheckedSkiffWriter::WriteUint32(ui32 value) +{ + Validator_->OnSimpleType(EWireType::Uint32); + Writer_.WriteUint32(value); +} + +void TCheckedSkiffWriter::WriteUint64(ui64 value) +{ + Validator_->OnSimpleType(EWireType::Uint64); + Writer_.WriteUint64(value); +} + +void TCheckedSkiffWriter::WriteInt128(TInt128 value) +{ + Validator_->OnSimpleType(EWireType::Int128); + Writer_.WriteInt128(value); +} + +void TCheckedSkiffWriter::WriteUint128(TUint128 value) +{ + Validator_->OnSimpleType(EWireType::Uint128); + Writer_.WriteUint128(value); +} + +void TCheckedSkiffWriter::WriteString32(TStringBuf value) +{ + Validator_->OnSimpleType(EWireType::String32); + Writer_.WriteString32(value); +} + +void TCheckedSkiffWriter::WriteYson32(TStringBuf value) +{ + Validator_->OnSimpleType(EWireType::Yson32); + Writer_.WriteYson32(value); +} + +void TCheckedSkiffWriter::WriteVariant8Tag(ui8 tag) +{ + Validator_->OnVariant8Tag(tag); + Writer_.WriteVariant8Tag(tag); +} + +void TCheckedSkiffWriter::WriteVariant16Tag(ui16 tag) +{ + Validator_->OnVariant16Tag(tag); + Writer_.WriteVariant16Tag(tag); +} + +void TCheckedSkiffWriter::Flush() +{ + Writer_.Flush(); +} + +void TCheckedSkiffWriter::Finish() +{ + Validator_->ValidateFinished(); + Writer_.Finish(); +} + +//////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/skiff.h b/library/cpp/skiff/skiff.h new file mode 100644 index 0000000000..183c112700 --- /dev/null +++ b/library/cpp/skiff/skiff.h @@ -0,0 +1,259 @@ +#pragma once + +#include "public.h" + +#include "zerocopy_output_writer.h" + +#include <util/generic/buffer.h> +#include <util/generic/yexception.h> + +#include <util/stream/input.h> +#include <util/stream/output.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +class TSkiffException + : public yexception +{ }; + +//////////////////////////////////////////////////////////////////////////////// + +template <typename T> +constexpr T EndOfSequenceTag() +{ + static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value, "T must be unsigned integer"); + return T(-1); +} + +//////////////////////////////////////////////////////////////////////////////// + +struct TInt128 +{ + ui64 Low = 0; + i64 High = 0; +}; + +struct TUint128 +{ + ui64 Low = 0; + ui64 High = 0; +}; + +bool operator==(TInt128 lhs, TInt128 rhs); +bool operator!=(TInt128 lhs, TInt128 rhs); + +bool operator==(TUint128 lhs, TUint128 rhs); +bool operator!=(TUint128 lhs, TUint128 rhs); + +//////////////////////////////////////////////////////////////////////////////// + +class TUncheckedSkiffParser +{ +public: + explicit TUncheckedSkiffParser(IZeroCopyInput* stream); + TUncheckedSkiffParser(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyInput* stream); + + i8 ParseInt8(); + i16 ParseInt16(); + i32 ParseInt32(); + i64 ParseInt64(); + + ui8 ParseUint8(); + ui16 ParseUint16(); + ui32 ParseUint32(); + ui64 ParseUint64(); + + TInt128 ParseInt128(); + TUint128 ParseUint128(); + + double ParseDouble(); + + bool ParseBoolean(); + + TStringBuf ParseString32(); + + TStringBuf ParseYson32(); + + ui8 ParseVariant8Tag(); + ui16 ParseVariant16Tag(); + + bool HasMoreData(); + + void ValidateFinished(); + + ui64 GetReadBytesCount() const; + +private: + const void* GetData(size_t size); + const void* GetDataViaBuffer(size_t size); + + size_t RemainingBytes() const; + void Advance(size_t size); + void RefillBuffer(); + + template <typename T> + T ParseSimple(); + +private: + IZeroCopyInput* const Underlying_; + + TBuffer Buffer_; + ui64 ReadBytesCount_ = 0; + char* Position_ = nullptr; + char* End_ = nullptr; + bool Exhausted_ = false; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TCheckedSkiffParser +{ +public: + TCheckedSkiffParser(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyInput* stream); + ~TCheckedSkiffParser(); + + i8 ParseInt8(); + i16 ParseInt16(); + i32 ParseInt32(); + i64 ParseInt64(); + + ui8 ParseUint8(); + ui16 ParseUint16(); + ui32 ParseUint32(); + ui64 ParseUint64(); + + TInt128 ParseInt128(); + TUint128 ParseUint128(); + + double ParseDouble(); + + bool ParseBoolean(); + + TStringBuf ParseString32(); + + TStringBuf ParseYson32(); + + ui8 ParseVariant8Tag(); + ui16 ParseVariant16Tag(); + + bool HasMoreData(); + + void ValidateFinished(); + + ui64 GetReadBytesCount() const; + +private: + TUncheckedSkiffParser Parser_; + std::unique_ptr<TSkiffValidator> Validator_; +}; + +//////////////////////////////////////////////////////////////////// + +class TUncheckedSkiffWriter +{ +public: + explicit TUncheckedSkiffWriter(IZeroCopyOutput* underlying); + explicit TUncheckedSkiffWriter(IOutputStream* underlying); + TUncheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyOutput* underlying); + TUncheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IOutputStream* underlying); + + ~TUncheckedSkiffWriter(); + + void WriteDouble(double value); + void WriteBoolean(bool value); + + void WriteInt8(i8 value); + void WriteInt16(i16 value); + void WriteInt32(i32 value); + void WriteInt64(i64 value); + + void WriteUint8(ui8 value); + void WriteUint16(ui16 value); + void WriteUint32(ui32 value); + void WriteUint64(ui64 value); + + void WriteInt128(TInt128 value); + void WriteUint128(TUint128 value); + + void WriteString32(TStringBuf value); + + void WriteYson32(TStringBuf value); + + void WriteVariant8Tag(ui8 tag); + void WriteVariant16Tag(ui16 tag); + + void Flush(); + void Finish(); + +private: + + template <typename T> + void WriteSimple(T data); + +private: + THolder<TBufferedOutput> BufferedOutput_; + TZeroCopyOutputStreamWriter Underlying_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TCheckedSkiffWriter +{ +public: + TCheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IZeroCopyOutput* underlying); + TCheckedSkiffWriter(const std::shared_ptr<TSkiffSchema>& schema, IOutputStream* underlying); + + ~TCheckedSkiffWriter(); + + void WriteInt8(i8 value); + void WriteInt16(i16 value); + void WriteInt32(i32 value); + void WriteInt64(i64 value); + + void WriteUint8(ui8 value); + void WriteUint16(ui16 value); + void WriteUint32(ui32 value); + void WriteUint64(ui64 value); + + void WriteDouble(double value); + void WriteBoolean(bool value); + + void WriteInt128(TInt128 value); + void WriteUint128(TUint128 value); + + void WriteString32(TStringBuf value); + + void WriteYson32(TStringBuf value); + + void WriteVariant8Tag(ui8 tag); + void WriteVariant16Tag(ui16 tag); + + void Flush(); + void Finish(); + +private: + TUncheckedSkiffWriter Writer_; + std::unique_ptr<TSkiffValidator> Validator_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <EWireType wireType> +class TUnderlyingIntegerType { +private: + TUnderlyingIntegerType() = default; + static constexpr auto F(); + +public: + using TValue = decltype(TUnderlyingIntegerType::F()); +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff + +#define SKIFF_H +#include "skiff-inl.h" +#undef SKIFF_H diff --git a/library/cpp/skiff/skiff_schema-inl.h b/library/cpp/skiff/skiff_schema-inl.h new file mode 100644 index 0000000000..d66325b222 --- /dev/null +++ b/library/cpp/skiff/skiff_schema-inl.h @@ -0,0 +1,61 @@ +#pragma once + +#ifndef SKIFF_SCHEMA_H +#error "Direct inclusion of this file is not allowed, include skiff_schema.h" +// For the sake of sane code completion. +#include "skiff_schema.h" +#endif +#undef SKIFF_SCHEMA_H + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +inline bool IsSimpleType(EWireType type) +{ + switch (type) { + case EWireType::Int8: + case EWireType::Int16: + case EWireType::Int32: + case EWireType::Int64: + case EWireType::Int128: + + case EWireType::Uint8: + case EWireType::Uint16: + case EWireType::Uint32: + case EWireType::Uint64: + case EWireType::Uint128: + + case EWireType::Double: + case EWireType::Boolean: + case EWireType::String32: + case EWireType::Yson32: + case EWireType::Nothing: + return true; + case EWireType::Tuple: + case EWireType::Variant8: + case EWireType::Variant16: + case EWireType::RepeatedVariant8: + case EWireType::RepeatedVariant16: + return false; + } + Y_FAIL(); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <EWireType WireType> +TComplexSchema<WireType>::TComplexSchema(TSkiffSchemaList elements) + : TSkiffSchema(WireType) + , Elements_(std::move(elements)) +{ } + +template <EWireType WireType> +const TSkiffSchemaList& TComplexSchema<WireType>::GetChildren() const +{ + return Elements_; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/skiff_schema.cpp b/library/cpp/skiff/skiff_schema.cpp new file mode 100644 index 0000000000..c762896ad0 --- /dev/null +++ b/library/cpp/skiff/skiff_schema.cpp @@ -0,0 +1,164 @@ +#include "skiff_schema.h" + +#include "skiff.h" + +#include <util/generic/hash.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +bool operator==(const TSkiffSchema& lhs, const TSkiffSchema& rhs) +{ + if (lhs.GetWireType() != rhs.GetWireType() || lhs.GetName() != rhs.GetName()) { + return false; + } + const auto& lhsChildren = lhs.GetChildren(); + const auto& rhsChildren = rhs.GetChildren(); + return std::equal( + std::begin(lhsChildren), + std::end(lhsChildren), + std::begin(rhsChildren), + std::end(rhsChildren), + TSkiffSchemaPtrEqual()); +} + +bool operator!=(const TSkiffSchema& lhs, const TSkiffSchema& rhs) +{ + return !(lhs == rhs); +} + +//////////////////////////////////////////////////////////////////////////////// + +void PrintShortDebugString(const std::shared_ptr<const TSkiffSchema>& schema, IOutputStream* out) +{ + (*out) << ToString(schema->GetWireType()); + if (!IsSimpleType(schema->GetWireType())) { + auto children = schema->GetChildren(); + if (!children.empty()) { + (*out) << '<'; + for (const auto& child : children) { + PrintShortDebugString(child, out); + (*out) << ';'; + } + (*out) << '>'; + } + } +} + +TString GetShortDebugString(const std::shared_ptr<const TSkiffSchema>& schema) +{ + TStringStream out; + PrintShortDebugString(schema, &out); + return out.Str(); +} + +std::shared_ptr<TSimpleTypeSchema> CreateSimpleTypeSchema(EWireType type) +{ + return std::make_shared<TSimpleTypeSchema>(type); +} + +static void VerifyNonemptyChildren(const TSkiffSchemaList& children, EWireType wireType) +{ + if (children.empty()) { + ythrow TSkiffException() << "\"" << ToString(wireType) << "\" must have at least one child"; + } +} + +std::shared_ptr<TTupleSchema> CreateTupleSchema(TSkiffSchemaList children) +{ + return std::make_shared<TTupleSchema>(std::move(children)); +} + +std::shared_ptr<TVariant8Schema> CreateVariant8Schema(TSkiffSchemaList children) +{ + VerifyNonemptyChildren(children, EWireType::Variant8); + return std::make_shared<TVariant8Schema>(std::move(children)); +} + +std::shared_ptr<TVariant16Schema> CreateVariant16Schema(TSkiffSchemaList children) +{ + VerifyNonemptyChildren(children, EWireType::Variant16); + return std::make_shared<TVariant16Schema>(std::move(children)); +} + +std::shared_ptr<TRepeatedVariant8Schema> CreateRepeatedVariant8Schema(TSkiffSchemaList children) +{ + VerifyNonemptyChildren(children, EWireType::RepeatedVariant8); + return std::make_shared<TRepeatedVariant8Schema>(std::move(children)); +} + +std::shared_ptr<TRepeatedVariant16Schema> CreateRepeatedVariant16Schema(TSkiffSchemaList children) +{ + VerifyNonemptyChildren(children, EWireType::RepeatedVariant16); + return std::make_shared<TRepeatedVariant16Schema>(std::move(children)); +} + +//////////////////////////////////////////////////////////////////////////////// + +TSkiffSchema::TSkiffSchema(EWireType type) + : Type_(type) +{ } + +EWireType TSkiffSchema::GetWireType() const +{ + return Type_; +} + +std::shared_ptr<TSkiffSchema> TSkiffSchema::SetName(TString name) +{ + Name_ = std::move(name); + return shared_from_this(); +} + +const TString& TSkiffSchema::GetName() const +{ + return Name_; +} + +const TSkiffSchemaList& TSkiffSchema::GetChildren() const +{ + static const TSkiffSchemaList children; + return children; +} + +//////////////////////////////////////////////////////////////////////////////// + +TSimpleTypeSchema::TSimpleTypeSchema(EWireType type) + : TSkiffSchema(type) +{ + Y_VERIFY(IsSimpleType(type)); +} + +//////////////////////////////////////////////////////////////////////////////// + +size_t TSkiffSchemaPtrHasher::operator()(const std::shared_ptr<TSkiffSchema>& schema) const +{ + return THash<NSkiff::TSkiffSchema>()(*schema); +} + +size_t TSkiffSchemaPtrEqual::operator()( + const std::shared_ptr<TSkiffSchema>& lhs, + const std::shared_ptr<TSkiffSchema>& rhs) const +{ + return *lhs == *rhs; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff + +//////////////////////////////////////////////////////////////////////////////// + +size_t THash<NSkiff::TSkiffSchema>::operator()(const NSkiff::TSkiffSchema &schema) const +{ + auto hash = CombineHashes( + THash<TString>()(schema.GetName()), + static_cast<size_t>(schema.GetWireType())); + for (const auto& child : schema.GetChildren()) { + hash = CombineHashes(hash, (*this)(*child)); + } + return hash; +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/skiff/skiff_schema.h b/library/cpp/skiff/skiff_schema.h new file mode 100644 index 0000000000..8952a84bac --- /dev/null +++ b/library/cpp/skiff/skiff_schema.h @@ -0,0 +1,121 @@ +#pragma once + +#include "public.h" + +#include <util/generic/string.h> +#include <util/string/cast.h> + +#include <vector> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +template <EWireType WireType> +class TComplexSchema; + +using TTupleSchema = TComplexSchema<EWireType::Tuple>; +using TVariant8Schema = TComplexSchema<EWireType::Variant8>; +using TVariant16Schema = TComplexSchema<EWireType::Variant16>; +using TRepeatedVariant8Schema = TComplexSchema<EWireType::RepeatedVariant8>; +using TRepeatedVariant16Schema = TComplexSchema<EWireType::RepeatedVariant16>; + +using TTupleSchemaPtr = std::shared_ptr<TTupleSchema>; +using TVariant8SchemaPtr = std::shared_ptr<TVariant8Schema>; +using TVariant16SchemaPtr = std::shared_ptr<TVariant16Schema>; +using TRepeatedVariant8SchemaPtr = std::shared_ptr<TRepeatedVariant8Schema>; +using TRepeatedVariant16SchemaPtr = std::shared_ptr<TRepeatedVariant16Schema>; + + + +//////////////////////////////////////////////////////////////////////////////// + +class TSkiffSchema + : public std::enable_shared_from_this<TSkiffSchema> +{ +public: + virtual ~TSkiffSchema() = default; + + EWireType GetWireType() const; + std::shared_ptr<TSkiffSchema> SetName(TString name); + const TString& GetName() const; + + virtual const TSkiffSchemaList& GetChildren() const; + +protected: + explicit TSkiffSchema(EWireType type); + +private: + const EWireType Type_; + TString Name_; +}; + +bool operator==(const TSkiffSchema& lhs, const TSkiffSchema& rhs); +bool operator!=(const TSkiffSchema& lhs, const TSkiffSchema& rhs); + +//////////////////////////////////////////////////////////////////////////////// + +class TSimpleTypeSchema + : public TSkiffSchema +{ +public: + explicit TSimpleTypeSchema(EWireType type); +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <EWireType WireType> +class TComplexSchema + : public TSkiffSchema +{ +public: + explicit TComplexSchema(TSkiffSchemaList elements); + + virtual const TSkiffSchemaList& GetChildren() const override; + +private: + const TSkiffSchemaList Elements_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +bool IsSimpleType(EWireType type); +TString GetShortDebugString(const std::shared_ptr<const TSkiffSchema>& schema); +void PrintShortDebugString(const std::shared_ptr<const TSkiffSchema>& schema, IOutputStream* out); + +std::shared_ptr<TSimpleTypeSchema> CreateSimpleTypeSchema(EWireType type); +std::shared_ptr<TTupleSchema> CreateTupleSchema(TSkiffSchemaList children); +std::shared_ptr<TVariant8Schema> CreateVariant8Schema(TSkiffSchemaList children); +std::shared_ptr<TVariant16Schema> CreateVariant16Schema(TSkiffSchemaList children); +std::shared_ptr<TRepeatedVariant8Schema> CreateRepeatedVariant8Schema(TSkiffSchemaList children); +std::shared_ptr<TRepeatedVariant16Schema> CreateRepeatedVariant16Schema(TSkiffSchemaList children); + +//////////////////////////////////////////////////////////////////////////////// + +struct TSkiffSchemaPtrHasher +{ + size_t operator()(const std::shared_ptr<TSkiffSchema>& schema) const; +}; + +struct TSkiffSchemaPtrEqual +{ + size_t operator()( + const std::shared_ptr<TSkiffSchema>& lhs, + const std::shared_ptr<TSkiffSchema>& rhs) const; +}; + +} // namespace NSkiff + +//////////////////////////////////////////////////////////////////////////////// + +template <> +struct THash<NSkiff::TSkiffSchema> +{ + size_t operator()(const NSkiff::TSkiffSchema& schema) const; +}; + +//////////////////////////////////////////////////////////////////////////////// + +#define SKIFF_SCHEMA_H +#include "skiff_schema-inl.h" +#undef SKIFF_SCHEMA_H diff --git a/library/cpp/skiff/skiff_validator.cpp b/library/cpp/skiff/skiff_validator.cpp new file mode 100644 index 0000000000..1b1b98d5a6 --- /dev/null +++ b/library/cpp/skiff/skiff_validator.cpp @@ -0,0 +1,396 @@ +#include "skiff.h" +#include "skiff_validator.h" + +#include <vector> +#include <stack> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +struct IValidatorNode; + +using TValidatorNodeList = std::vector<std::shared_ptr<IValidatorNode>>; +using TSkiffSchemaList = std::vector<std::shared_ptr<TSkiffSchema>>; + +static std::shared_ptr<IValidatorNode> CreateUsageValidatorNode(const std::shared_ptr<TSkiffSchema>& skiffSchema); +static TValidatorNodeList CreateUsageValidatorNodeList(const TSkiffSchemaList& skiffSchemaList); + +//////////////////////////////////////////////////////////////////////////////// + +template <typename T> +inline void ThrowUnexpectedParseWrite(T wireType) +{ + ythrow TSkiffException() << "Unexpected parse/write of \"" << ::ToString(wireType) << "\" token"; +} + +//////////////////////////////////////////////////////////////////////////////// + +struct IValidatorNode +{ + virtual ~IValidatorNode() = default; + + virtual void OnBegin(TValidatorNodeStack* /*validatorNodeStack*/) + { } + + virtual void OnChildDone(TValidatorNodeStack* /*validatorNodeStack*/) + { + Y_FAIL(); + } + + virtual void OnSimpleType(TValidatorNodeStack* /*validatorNodeStack*/, EWireType wireType) + { + ThrowUnexpectedParseWrite(wireType); + } + + virtual void BeforeVariant8Tag() + { + ThrowUnexpectedParseWrite(EWireType::Variant8); + } + + virtual void OnVariant8Tag(TValidatorNodeStack* /*validatorNodeStack*/, ui8 /*tag*/) + { + IValidatorNode::BeforeVariant8Tag(); + } + + virtual void BeforeVariant16Tag() + { + ThrowUnexpectedParseWrite(EWireType::Variant16); + } + + virtual void OnVariant16Tag(TValidatorNodeStack* /*validatorNodeStack*/, ui16 /*tag*/) + { + IValidatorNode::BeforeVariant16Tag(); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TValidatorNodeStack +{ +public: + explicit TValidatorNodeStack(std::shared_ptr<IValidatorNode> validator) + : RootValidator_(std::move(validator)) + { } + + void PushValidator(IValidatorNode* validator) + { + ValidatorStack_.push(validator); + validator->OnBegin(this); + } + + void PopValidator() + { + Y_VERIFY(!ValidatorStack_.empty()); + ValidatorStack_.pop(); + if (!ValidatorStack_.empty()) { + ValidatorStack_.top()->OnChildDone(this); + } + } + + void PushRootIfRequired() + { + if (ValidatorStack_.empty()) { + PushValidator(RootValidator_.get()); + } + } + + IValidatorNode* Top() const + { + Y_VERIFY(!ValidatorStack_.empty()); + return ValidatorStack_.top(); + } + + bool IsFinished() const + { + return ValidatorStack_.empty(); + } + +private: + const std::shared_ptr<IValidatorNode> RootValidator_; + std::stack<IValidatorNode*> ValidatorStack_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TNothingTypeValidator + : public IValidatorNode +{ +public: + void OnBegin(TValidatorNodeStack* validatorNodeStack) override + { + validatorNodeStack->PopValidator(); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TSimpleTypeUsageValidator + : public IValidatorNode +{ +public: + explicit TSimpleTypeUsageValidator(EWireType type) + : Type_(type) + { } + + void OnSimpleType(TValidatorNodeStack* validatorNodeStack, EWireType type) override + { + if (type != Type_) { + ThrowUnexpectedParseWrite(type); + } + validatorNodeStack->PopValidator(); + } + +private: + const EWireType Type_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <typename TTag> +void ValidateVariantTag(TValidatorNodeStack* validatorNodeStack, TTag tag, const TValidatorNodeList& children) +{ + if (tag == EndOfSequenceTag<TTag>()) { + // Root validator is pushed into the stack before variant tag + // if the stack is empty. + validatorNodeStack->PopValidator(); + } else if (tag >= children.size()) { + ythrow TSkiffException() << "Variant tag \"" << tag << "\" " + << "exceeds number of children \"" << children.size(); + } else { + validatorNodeStack->PushValidator(children[tag].get()); + } +} + +class TVariant8TypeUsageValidator + : public IValidatorNode +{ +public: + explicit TVariant8TypeUsageValidator(TValidatorNodeList children) + : Children_(std::move(children)) + { } + + void BeforeVariant8Tag() override + { } + + void OnVariant8Tag(TValidatorNodeStack* validatorNodeStack, ui8 tag) override + { + ValidateVariantTag(validatorNodeStack, tag, Children_); + } + + void OnChildDone(TValidatorNodeStack* validatorNodeStack) override + { + validatorNodeStack->PopValidator(); + } + +private: + const TValidatorNodeList Children_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TVariant16TypeUsageValidator + : public IValidatorNode +{ +public: + explicit TVariant16TypeUsageValidator(TValidatorNodeList children) + : Children_(std::move(children)) + { } + + void BeforeVariant16Tag() override + { } + + void OnVariant16Tag(TValidatorNodeStack* validatorNodeStack, ui16 tag) override + { + ValidateVariantTag(validatorNodeStack, tag, Children_); + } + + void OnChildDone(TValidatorNodeStack* validatorNodeStack) override + { + validatorNodeStack->PopValidator(); + } + +private: + const TValidatorNodeList Children_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TRepeatedVariant8TypeUsageValidator + : public IValidatorNode +{ +public: + explicit TRepeatedVariant8TypeUsageValidator(TValidatorNodeList children) + : Children_(std::move(children)) + { } + + void BeforeVariant8Tag() override + { } + + void OnVariant8Tag(TValidatorNodeStack* validatorNodeStack, ui8 tag) override + { + ValidateVariantTag(validatorNodeStack, tag, Children_); + } + + void OnChildDone(TValidatorNodeStack* /*validatorNodeStack*/) override + { } + +private: + const TValidatorNodeList Children_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TRepeatedVariant16TypeUsageValidator + : public IValidatorNode +{ +public: + explicit TRepeatedVariant16TypeUsageValidator(TValidatorNodeList children) + : Children_(std::move(children)) + { } + + void BeforeVariant16Tag() override + { } + + void OnVariant16Tag(TValidatorNodeStack* validatorNodeStack, ui16 tag) override + { + ValidateVariantTag(validatorNodeStack, tag, Children_); + } + + void OnChildDone(TValidatorNodeStack* /*validatorNodeStack*/) override + { } + +private: + const TValidatorNodeList Children_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TTupleTypeUsageValidator + : public IValidatorNode +{ +public: + explicit TTupleTypeUsageValidator(TValidatorNodeList children) + : Children_(std::move(children)) + { } + + void OnBegin(TValidatorNodeStack* validatorNodeStack) override + { + Position_ = 0; + if (!Children_.empty()) { + validatorNodeStack->PushValidator(Children_[0].get()); + } + } + + void OnChildDone(TValidatorNodeStack* validatorNodeStack) override + { + Position_++; + if (Position_ < Children_.size()) { + validatorNodeStack->PushValidator(Children_[Position_].get()); + } else { + validatorNodeStack->PopValidator(); + } + } + +private: + const TValidatorNodeList Children_; + ui32 Position_ = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// + +TSkiffValidator::TSkiffValidator(std::shared_ptr<TSkiffSchema> skiffSchema) + : Context_(std::make_unique<TValidatorNodeStack>(CreateUsageValidatorNode(std::move(skiffSchema)))) +{ } + +TSkiffValidator::~TSkiffValidator() +{ } + +void TSkiffValidator::BeforeVariant8Tag() +{ + Context_->PushRootIfRequired(); + Context_->Top()->BeforeVariant8Tag(); +} + +void TSkiffValidator::OnVariant8Tag(ui8 tag) +{ + Context_->PushRootIfRequired(); + Context_->Top()->OnVariant8Tag(Context_.get(), tag); +} + +void TSkiffValidator::BeforeVariant16Tag() +{ + Context_->PushRootIfRequired(); + Context_->Top()->BeforeVariant16Tag(); +} + +void TSkiffValidator::OnVariant16Tag(ui16 tag) +{ + Context_->PushRootIfRequired(); + Context_->Top()->OnVariant16Tag(Context_.get(), tag); +} + +void TSkiffValidator::OnSimpleType(EWireType value) +{ + Context_->PushRootIfRequired(); + Context_->Top()->OnSimpleType(Context_.get(), value); +} + +void TSkiffValidator::ValidateFinished() +{ + if (!Context_->IsFinished()) { + ythrow TSkiffException() << "Parse/write is not finished"; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +TValidatorNodeList CreateUsageValidatorNodeList(const TSkiffSchemaList& skiffSchemaList) +{ + TValidatorNodeList result; + result.reserve(skiffSchemaList.size()); + for (const auto& skiffSchema : skiffSchemaList) { + result.push_back(CreateUsageValidatorNode(skiffSchema)); + } + return result; +} + +std::shared_ptr<IValidatorNode> CreateUsageValidatorNode(const std::shared_ptr<TSkiffSchema>& skiffSchema) +{ + switch (skiffSchema->GetWireType()) { + case EWireType::Int8: + case EWireType::Int16: + case EWireType::Int32: + case EWireType::Int64: + case EWireType::Int128: + + case EWireType::Uint8: + case EWireType::Uint16: + case EWireType::Uint32: + case EWireType::Uint64: + case EWireType::Uint128: + + case EWireType::Double: + case EWireType::Boolean: + case EWireType::String32: + case EWireType::Yson32: + return std::make_shared<TSimpleTypeUsageValidator>(skiffSchema->GetWireType()); + case EWireType::Nothing: + return std::make_shared<TNothingTypeValidator>(); + case EWireType::Tuple: + return std::make_shared<TTupleTypeUsageValidator>(CreateUsageValidatorNodeList(skiffSchema->GetChildren())); + case EWireType::Variant8: + return std::make_shared<TVariant8TypeUsageValidator>(CreateUsageValidatorNodeList(skiffSchema->GetChildren())); + case EWireType::Variant16: + return std::make_shared<TVariant16TypeUsageValidator>(CreateUsageValidatorNodeList(skiffSchema->GetChildren())); + case EWireType::RepeatedVariant8: + return std::make_shared<TRepeatedVariant8TypeUsageValidator>(CreateUsageValidatorNodeList(skiffSchema->GetChildren())); + case EWireType::RepeatedVariant16: + return std::make_shared<TRepeatedVariant16TypeUsageValidator>(CreateUsageValidatorNodeList(skiffSchema->GetChildren())); + } + Y_FAIL(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/skiff_validator.h b/library/cpp/skiff/skiff_validator.h new file mode 100644 index 0000000000..522cc74db6 --- /dev/null +++ b/library/cpp/skiff/skiff_validator.h @@ -0,0 +1,39 @@ +#pragma once + +#include "public.h" + +#include "skiff_schema.h" + +#include <util/string/cast.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +class TValidatorNodeStack; + +//////////////////////////////////////////////////////////////////////////////// + +class TSkiffValidator +{ +public: + explicit TSkiffValidator(std::shared_ptr<TSkiffSchema> skiffSchema); + ~TSkiffValidator(); + + void BeforeVariant8Tag(); + void OnVariant8Tag(ui8 tag); + + void BeforeVariant16Tag(); + void OnVariant16Tag(ui16 tag); + + void OnSimpleType(EWireType value); + + void ValidateFinished(); + +private: + const std::unique_ptr<TValidatorNodeStack> Context_; +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/unittests/skiff_schema_ut.cpp b/library/cpp/skiff/unittests/skiff_schema_ut.cpp new file mode 100644 index 0000000000..c20a560dfc --- /dev/null +++ b/library/cpp/skiff/unittests/skiff_schema_ut.cpp @@ -0,0 +1,148 @@ +#include <library/cpp/skiff/skiff.h> +#include <library/cpp/skiff/skiff_schema.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/buffer.h> + +using namespace NSkiff; + +//////////////////////////////////////////////////////////////////////////////// + +template<> +void Out<TSkiffSchema>(IOutputStream& s, const TSkiffSchema& schema) +{ + s << "TSkiffSchema:" << GetShortDebugString(schema.shared_from_this()); +} + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TSkiffSchemaTestSuite) { + Y_UNIT_TEST(TestIntEqual) + { + std::shared_ptr<TSkiffSchema> schema1 = CreateSimpleTypeSchema(EWireType::Uint64); + schema1->SetName("schema"); + + std::shared_ptr<TSkiffSchema> schema2 = CreateSimpleTypeSchema(EWireType::Uint64); + schema2->SetName("schema"); + + UNIT_ASSERT_VALUES_EQUAL(*schema1, *schema2); + } + + Y_UNIT_TEST(TestTupleEqual) + { + std::shared_ptr<TSkiffSchema> schema1 = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + + std::shared_ptr<TSkiffSchema> schema2 = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + + Cerr << *schema1 << Endl; + + schema1->SetName("schema"); + UNIT_ASSERT_VALUES_UNEQUAL(*schema1, *schema2); + + schema2->SetName("schema"); + UNIT_ASSERT_VALUES_EQUAL(*schema1, *schema2); + } + + Y_UNIT_TEST(TestHashes) + { + TSet<size_t> hashes; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint64); + schema->SetName("schema"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema)); + + schema = CreateSimpleTypeSchema(EWireType::Uint64); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema)); + + auto schema2 = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + schema2->SetName("s"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema2)); + + schema2->SetName("s0"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema2)); + + schema2->SetName("s"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema2)); + + auto schema3 = CreateRepeatedVariant16Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + schema2, + }); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema3)); + + schema3->SetName("kek"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema3)); + + auto schema4 = CreateRepeatedVariant8Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + schema2, + }); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema4)); + + schema4->SetName("kek"); + hashes.insert(THash<NSkiff::TSkiffSchema>()(*schema4)); + + UNIT_ASSERT_VALUES_EQUAL(hashes.size(), 8); + } + + Y_UNIT_TEST(TestDifferent) + { + TVector<std::shared_ptr<TSkiffSchema>> schemas; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint64); + schema->SetName("schema"); + schemas.push_back(schema); + schemas.push_back(CreateSimpleTypeSchema(EWireType::Uint64)); + + auto schema2 = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + schema2->SetName("s"); + schemas.push_back(schema2); + + auto schema3 = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + schema3->SetName("s0"); + schemas.push_back(schema3); + + auto schema4 = CreateRepeatedVariant16Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + schema2, + }); + schemas.push_back(schema4); + + auto schema5 = CreateRepeatedVariant16Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + schema2, + }); + schema5->SetName("kek"); + schemas.push_back(schema5); + + auto schema6 = CreateRepeatedVariant8Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + schema2, + }); + schemas.push_back(schema6); + + for (size_t i = 0; i < schemas.size(); ++i) { + for (size_t j = i + 1; j < schemas.size(); ++j) { + UNIT_ASSERT_VALUES_UNEQUAL(*schemas[i], *schemas[j]); + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/skiff/unittests/skiff_ut.cpp b/library/cpp/skiff/unittests/skiff_ut.cpp new file mode 100644 index 0000000000..5e4c709611 --- /dev/null +++ b/library/cpp/skiff/unittests/skiff_ut.cpp @@ -0,0 +1,627 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <library/cpp/skiff/skiff.h> +#include <library/cpp/skiff/skiff_schema.h> + +#include <util/stream/buffer.h> +#include <util/string/hex.h> + +using namespace NSkiff; + +//////////////////////////////////////////////////////////////////////////////// + +static TString HexEncode(const TBuffer& buffer) +{ + auto result = HexEncode(buffer.Data(), buffer.Size()); + result.to_lower(); + return result; +} + +Y_UNIT_TEST_SUITE(Skiff) +{ + Y_UNIT_TEST(TestInt8) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int8); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteInt8(42); + tokenWriter.WriteInt8(-42); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "2a" + "d6"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt8(), 42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt8(), -42); + } + + Y_UNIT_TEST(TestInt16) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int16); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteInt16(0x1234); + tokenWriter.WriteInt16(-0x1234); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "3412" + "cced"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt16(), 0x1234); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt16(), -0x1234); + } + + Y_UNIT_TEST(TestInt32) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int32); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteInt32(0x12345678); + tokenWriter.WriteInt32(-0x12345678); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "78563412" + "88a9cbed"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt32(), 0x12345678); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt32(), -0x12345678); + } + + Y_UNIT_TEST(TestInt64) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int64); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteInt64(-42); + tokenWriter.WriteInt64(100500); + tokenWriter.WriteInt64(-0x123456789abcdef0); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "d6ffffffffffffff" + "9488010000000000" + "1021436587a9cbed"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), -42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), 100500); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), -0x123456789abcdef0); + } + + Y_UNIT_TEST(TestUint8) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint8); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteUint8(42); + tokenWriter.WriteUint8(200); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "2a" + "c8"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint8(), 42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint8(), 200); + } + + Y_UNIT_TEST(TestUint16) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint16); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteUint16(0x1234); + tokenWriter.WriteUint16(0xfedc); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "3412" + "dcfe"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint16(), 0x1234); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint16(), 0xfedc); + } + + Y_UNIT_TEST(TestUint32) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint32); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteUint32(0x12345678); + tokenWriter.WriteUint32(0x87654321); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "78563412" + "21436587"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint32(), 0x12345678); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint32(), 0x87654321); + } + + + Y_UNIT_TEST(TestUint64) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint64); + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteUint64(42); + tokenWriter.WriteUint64(100500); + tokenWriter.WriteUint64(0x123456789abcdef0); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "2a00000000000000" + "9488010000000000" + "f0debc9a78563412"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 100500); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 0x123456789abcdef0); + } + + Y_UNIT_TEST(TestInt128) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int128); + + const TInt128 val1 = {0x1924cd4aeb9ced82, 0x0885e83f456d6a7e}; + const TInt128 val2 = {0xe9ba36585eccae1a, -0x7854b6f9ce448be9}; + + TCheckedSkiffWriter writer(schema, &bufferStream); + writer.WriteInt128(val1); + writer.WriteInt128(val2); + writer.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "82ed9ceb4acd2419" "7e6a6d453fe88508" + "1aaecc5e5836bae9" "1774bb310649ab87"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_EQUAL(parser.ParseInt128(), val1); + UNIT_ASSERT_EQUAL(parser.ParseInt128(), val2); + } + + Y_UNIT_TEST(TestUint128) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Uint128); + + const auto val1 = TUint128{0x1924cd4aeb9ced82, 0x0885e83f456d6a7e}; + const auto val2 = TUint128{0xe9ba36585eccae1a, 0x8854b6f9ce448be9}; + + TCheckedSkiffWriter writer(schema, &bufferStream); + writer.WriteUint128(val1); + writer.WriteUint128(val2); + writer.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "82ed9ceb4acd2419" "7e6a6d453fe88508" + "1aaecc5e5836bae9" "e98b44cef9b65488"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_EQUAL(parser.ParseUint128(), val1); + UNIT_ASSERT_EQUAL(parser.ParseUint128(), val2); + } + + Y_UNIT_TEST(TestBoolean) + { + auto schema = CreateSimpleTypeSchema(EWireType::Boolean); + + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteBoolean(true); + tokenWriter.WriteBoolean(false); + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseBoolean(), true); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseBoolean(), false); + + { + TBufferStream bufferStream; + bufferStream.Write('\x02'); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_EXCEPTION(parser.ParseBoolean(), std::exception); + } + } + + Y_UNIT_TEST(TestVariant8) + { + auto schema = CreateVariant8Schema({ + CreateSimpleTypeSchema(EWireType::Nothing), + CreateSimpleTypeSchema(EWireType::Uint64), + }); + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + UNIT_ASSERT_EXCEPTION(tokenWriter.WriteUint64(42), std::exception); + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteVariant8Tag(0); + UNIT_ASSERT_EXCEPTION(tokenWriter.WriteUint64(42), std::exception); + } + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteVariant8Tag(1); + UNIT_ASSERT_EXCEPTION(tokenWriter.WriteInt64(42), std::exception); + } + + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteVariant8Tag(0); + tokenWriter.WriteVariant8Tag(1); + tokenWriter.WriteUint64(42); + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 0); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 42); + + parser.ValidateFinished(); + } + + Y_UNIT_TEST(TestTuple) + { + + auto schema = CreateTupleSchema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::String32), + }); + + { + TBufferStream bufferStream; + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteInt64(42); + tokenWriter.WriteString32("foobar"); + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), 42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseString32(), "foobar"); + parser.ValidateFinished(); + } + } + + Y_UNIT_TEST(TestString) + { + + auto schema = CreateSimpleTypeSchema(EWireType::String32); + + { + TBufferStream bufferStream; + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteString32("foo"); + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + + UNIT_ASSERT_VALUES_EQUAL(parser.ParseString32(), "foo"); + + parser.ValidateFinished(); + } + + { + TBufferStream bufferStream; + + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + tokenWriter.WriteString32("foo"); + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_EXCEPTION(parser.ParseInt64(), std::exception); + } + } + + Y_UNIT_TEST(TestRepeatedVariant8) + { + + auto schema = CreateRepeatedVariant8Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::Uint64), + }); + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + // row 0 + tokenWriter.WriteVariant8Tag(0); + tokenWriter.WriteInt64(-8); + + // row 2 + tokenWriter.WriteVariant8Tag(1); + tokenWriter.WriteUint64(42); + + // end + tokenWriter.WriteVariant8Tag(EndOfSequenceTag<ui8>()); + + tokenWriter.Finish(); + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + // row 1 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 0); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), -8); + + // row 2 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 42); + + // end + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), EndOfSequenceTag<ui8>()); + + parser.ValidateFinished(); + } + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + UNIT_ASSERT_EXCEPTION(parser.ParseInt64(), std::exception); + } + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + parser.ParseVariant8Tag(); + UNIT_ASSERT_EXCEPTION(parser.ParseUint64(), std::exception); + } + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + parser.ParseVariant8Tag(); + parser.ParseInt64(); + + UNIT_ASSERT_EXCEPTION(parser.ValidateFinished(), std::exception); + } + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + tokenWriter.WriteVariant8Tag(0); + UNIT_ASSERT_EXCEPTION(tokenWriter.WriteUint64(5), std::exception); + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + tokenWriter.WriteVariant8Tag(1); + tokenWriter.WriteUint64(5); + + UNIT_ASSERT_EXCEPTION(tokenWriter.Finish(), std::exception); + } + } + + Y_UNIT_TEST(TestRepeatedVariant16) + { + + auto schema = CreateRepeatedVariant16Schema({ + CreateSimpleTypeSchema(EWireType::Int64), + CreateSimpleTypeSchema(EWireType::Uint64), + }); + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + // row 0 + tokenWriter.WriteVariant16Tag(0); + tokenWriter.WriteInt64(-8); + + // row 2 + tokenWriter.WriteVariant16Tag(1); + tokenWriter.WriteUint64(42); + + // end + tokenWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>()); + + tokenWriter.Finish(); + + TCheckedSkiffParser parser(schema, &bufferStream); + + // row 1 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), 0); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), -8); + + // row 2 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 42); + + // end + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), EndOfSequenceTag<ui16>()); + + parser.ValidateFinished(); + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + tokenWriter.WriteVariant16Tag(0); + UNIT_ASSERT_EXCEPTION(tokenWriter.WriteUint64(5), std::exception); + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + tokenWriter.WriteVariant16Tag(1); + tokenWriter.WriteUint64(5); + + UNIT_ASSERT_EXCEPTION(tokenWriter.Finish(), std::exception); + } + + { + TBufferStream bufferStream; + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + // row 0 + tokenWriter.WriteVariant16Tag(0); + tokenWriter.WriteInt64(-8); + + // row 2 + tokenWriter.WriteVariant16Tag(1); + tokenWriter.WriteUint64(42); + + // end + tokenWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>()); + + tokenWriter.Finish(); + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + UNIT_ASSERT_EXCEPTION(parser.ParseInt64(), std::exception); + } + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + parser.ParseVariant16Tag(); + UNIT_ASSERT_EXCEPTION(parser.ParseUint64(), std::exception); + } + + { + TBufferInput input(bufferStream.Buffer()); + TCheckedSkiffParser parser(schema, &input); + + parser.ParseVariant16Tag(); + parser.ParseInt64(); + + UNIT_ASSERT_EXCEPTION(parser.ValidateFinished(), std::exception); + } + } + } + + Y_UNIT_TEST(TestStruct) + { + TBufferStream bufferStream; + + auto schema = CreateRepeatedVariant16Schema( + { + CreateSimpleTypeSchema(EWireType::Nothing), + CreateTupleSchema({ + CreateVariant8Schema({ + CreateSimpleTypeSchema(EWireType::Nothing), + CreateSimpleTypeSchema(EWireType::Int64) + }), + CreateSimpleTypeSchema(EWireType::Uint64), + }) + } + ); + + { + TCheckedSkiffWriter tokenWriter(schema, &bufferStream); + + // row 0 + tokenWriter.WriteVariant16Tag(0); + + // row 1 + tokenWriter.WriteVariant16Tag(1); + tokenWriter.WriteVariant8Tag(0); + tokenWriter.WriteUint64(1); + + // row 2 + tokenWriter.WriteVariant16Tag(1); + tokenWriter.WriteVariant8Tag(1); + tokenWriter.WriteInt64(2); + tokenWriter.WriteUint64(3); + + // end + tokenWriter.WriteVariant16Tag(EndOfSequenceTag<ui16>()); + + tokenWriter.Finish(); + } + + TCheckedSkiffParser parser(schema, &bufferStream); + + // row 0 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), 0); + + // row 1 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 0); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 1); + + // row 2 + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant8Tag(), 1); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt64(), 2); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseUint64(), 3); + + // end + UNIT_ASSERT_VALUES_EQUAL(parser.ParseVariant16Tag(), EndOfSequenceTag<ui16>()); + + parser.ValidateFinished(); + } + + Y_UNIT_TEST(TestSimpleOutputStream) + { + TBufferStream bufferStream; + + auto schema = CreateSimpleTypeSchema(EWireType::Int8); + + TCheckedSkiffWriter tokenWriter(schema, static_cast<IOutputStream*>(&bufferStream)); + tokenWriter.WriteInt8(42); + tokenWriter.WriteInt8(-42); + tokenWriter.Finish(); + + UNIT_ASSERT_VALUES_EQUAL(HexEncode(bufferStream.Buffer()), + "2a" + "d6"); + + TCheckedSkiffParser parser(schema, &bufferStream); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt8(), 42); + UNIT_ASSERT_VALUES_EQUAL(parser.ParseInt8(), -42); + } +} + +//////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/skiff/unittests/ya.make b/library/cpp/skiff/unittests/ya.make new file mode 100644 index 0000000000..d67ca8c618 --- /dev/null +++ b/library/cpp/skiff/unittests/ya.make @@ -0,0 +1,12 @@ +UNITTEST() + +SRCS( + skiff_ut.cpp + skiff_schema_ut.cpp +) + +PEERDIR( + library/cpp/skiff +) + +END() diff --git a/library/cpp/skiff/ya.make b/library/cpp/skiff/ya.make new file mode 100644 index 0000000000..ff3eb55c9f --- /dev/null +++ b/library/cpp/skiff/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + skiff.cpp + skiff_schema.cpp + skiff_validator.cpp + zerocopy_output_writer.cpp +) + +GENERATE_ENUM_SERIALIZATION(public.h) + +END() + +RECURSE_FOR_TESTS( + unittests +) diff --git a/library/cpp/skiff/zerocopy_output_writer-inl.h b/library/cpp/skiff/zerocopy_output_writer-inl.h new file mode 100644 index 0000000000..6bd067c9fa --- /dev/null +++ b/library/cpp/skiff/zerocopy_output_writer-inl.h @@ -0,0 +1,51 @@ +#pragma once +#ifndef ZEROCOPY_OUTPUT_WRITER_INL_H_ +#error "Direct inclusion of this file is not allowed, include zerocopy_output_writer.h" +// For the sake of sane code completion. +#include "zerocopy_output_writer.h" +#endif + +#include <util/system/yassert.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +char* TZeroCopyOutputStreamWriter::Current() const +{ + return Current_; +} + +ui64 TZeroCopyOutputStreamWriter::RemainingBytes() const +{ + return RemainingBytes_; +} + +void TZeroCopyOutputStreamWriter::Advance(size_t bytes) +{ + Y_VERIFY(bytes <= RemainingBytes_); + Current_ += bytes; + RemainingBytes_ -= bytes; +} + +void TZeroCopyOutputStreamWriter::Write(const void* buffer, size_t length) +{ + if (length > RemainingBytes_) { + UndoRemaining(); + Output_->Write(buffer, length); + TotalWrittenBlockSize_ += length; + ObtainNextBlock(); + } else { + memcpy(Current_, buffer, length); + Advance(length); + } +} + +ui64 TZeroCopyOutputStreamWriter::GetTotalWrittenSize() const +{ + return TotalWrittenBlockSize_ - RemainingBytes_; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/zerocopy_output_writer.cpp b/library/cpp/skiff/zerocopy_output_writer.cpp new file mode 100644 index 0000000000..49492b55a4 --- /dev/null +++ b/library/cpp/skiff/zerocopy_output_writer.cpp @@ -0,0 +1,38 @@ +#include "zerocopy_output_writer.h" + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +TZeroCopyOutputStreamWriter::TZeroCopyOutputStreamWriter(IZeroCopyOutput* output) + : Output_(output) +{ + ObtainNextBlock(); +} + +TZeroCopyOutputStreamWriter::~TZeroCopyOutputStreamWriter() +{ + if (RemainingBytes_ > 0) { + UndoRemaining(); + } +} + +void TZeroCopyOutputStreamWriter::ObtainNextBlock() +{ + if (RemainingBytes_ > 0) { + UndoRemaining(); + } + RemainingBytes_ = Output_->Next(&Current_); + TotalWrittenBlockSize_ += RemainingBytes_; +} + +void TZeroCopyOutputStreamWriter::UndoRemaining() +{ + Output_->Undo(RemainingBytes_); + TotalWrittenBlockSize_ -= RemainingBytes_; + RemainingBytes_ = 0; +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff diff --git a/library/cpp/skiff/zerocopy_output_writer.h b/library/cpp/skiff/zerocopy_output_writer.h new file mode 100644 index 0000000000..b0bccc5a63 --- /dev/null +++ b/library/cpp/skiff/zerocopy_output_writer.h @@ -0,0 +1,41 @@ +#pragma once + +#include <util/stream/zerocopy_output.h> + +namespace NSkiff { + +//////////////////////////////////////////////////////////////////////////////// + +// Simple wrapper around +class TZeroCopyOutputStreamWriter + : private TNonCopyable +{ +public: + explicit TZeroCopyOutputStreamWriter(IZeroCopyOutput* output); + + ~TZeroCopyOutputStreamWriter(); + + Y_FORCE_INLINE char* Current() const; + Y_FORCE_INLINE ui64 RemainingBytes() const; + Y_FORCE_INLINE void Advance(size_t bytes); + void UndoRemaining(); + Y_FORCE_INLINE void Write(const void* buffer, size_t length); + Y_FORCE_INLINE ui64 GetTotalWrittenSize() const; + +private: + void ObtainNextBlock(); + +private: + IZeroCopyOutput* Output_; + char* Current_ = nullptr; + ui64 RemainingBytes_ = 0; + ui64 TotalWrittenBlockSize_ = 0; +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NSkiff + +#define ZEROCOPY_OUTPUT_WRITER_INL_H_ +#include "zerocopy_output_writer-inl.h" +#undef ZEROCOPY_OUTPUT_WRITER_INL_H_ |