diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/apache/avro/api | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/apache/avro/api')
35 files changed, 7030 insertions, 0 deletions
diff --git a/contrib/libs/apache/avro/api/AvroParse.hh b/contrib/libs/apache/avro/api/AvroParse.hh new file mode 100644 index 0000000000..dd2b98c419 --- /dev/null +++ b/contrib/libs/apache/avro/api/AvroParse.hh @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_AvroParse_hh__ +#define avro_AvroParse_hh__ + +#include "Config.hh" +#include "AvroTraits.hh" +#include "ResolvingReader.hh" + +/// \file +/// +/// Standalone parse functions for Avro types. + +namespace avro { + +/// The main parse entry point function. Takes a parser (either validating or +/// plain) and the object that should receive the parsed data. + +template <typename Reader, typename T> +void parse(Reader &p, T& val) +{ + parse(p, val, is_serializable<T>()); +} + +template <typename T> +void parse(ResolvingReader &p, T& val) +{ + translatingParse(p, val, is_serializable<T>()); +} + +/// Type trait should be set to is_serializable in otherwise force the compiler to complain. + +template <typename Reader, typename T> +void parse(Reader &p, T& val, const std::false_type &) +{ + static_assert(sizeof(T) == 0, "Not a valid type to parse"); +} + +template <typename Reader, typename T> +void translatingParse(Reader &p, T& val, const std::false_type &) +{ + static_assert(sizeof(T) == 0, "Not a valid type to parse"); +} + +// @{ + +/// The remainder of the file includes default implementations for serializable types. + + +template <typename Reader, typename T> +void parse(Reader &p, T &val, const std::true_type &) { + p.readValue(val); +} + +template <typename Reader> +void parse(Reader &p, std::vector<uint8_t> &val, const std::true_type &) { + p.readBytes(val); +} + +template<typename T> +void translatingParse(ResolvingReader &p, T& val, const std::true_type &) { + p.parse(val); +} + +// @} + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/AvroSerialize.hh b/contrib/libs/apache/avro/api/AvroSerialize.hh new file mode 100644 index 0000000000..9495940c9c --- /dev/null +++ b/contrib/libs/apache/avro/api/AvroSerialize.hh @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_AvroSerialize_hh__ +#define avro_AvroSerialize_hh__ + +#include "Config.hh" +#include "AvroTraits.hh" + +/// \file +/// +/// Standalone serialize functions for Avro types. + +namespace avro { + +/// The main serializer entry point function. Takes a serializer (either validating or +/// plain) and the object that should be serialized. + +template <typename Writer, typename T> +void serialize(Writer &s, const T& val) +{ + serialize(s, val, is_serializable<T>()); +} + +/// Type trait should be set to is_serializable in otherwise force the compiler to complain. + +template <typename Writer, typename T> +void serialize(Writer &s, const T& val, const std::false_type &) +{ + static_assert(sizeof(T) == 0, "Not a valid type to serialize"); +} + +/// The remainder of the file includes default implementations for serializable types. + +// @{ + +template <typename Writer, typename T> +void serialize(Writer &s, T val, const std::true_type &) { + s.writeValue(val); +} + +template <typename Writer> +void serialize(Writer &s, const std::vector<uint8_t> &val, const std::true_type &) { + s.writeBytes(val.data(), val.size()); +} + +// @} + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/AvroTraits.hh b/contrib/libs/apache/avro/api/AvroTraits.hh new file mode 100644 index 0000000000..91e2130c76 --- /dev/null +++ b/contrib/libs/apache/avro/api/AvroTraits.hh @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_AvroTraits_hh__ +#define avro_AvroTraits_hh__ + +#include "Config.hh" +#include "Types.hh" +#include <stdint.h> +#include <type_traits> + +/** @file + * + * This header contains type traits and similar utilities used by the library. + */ +namespace avro { + +/** + * Define an is_serializable trait for types we can serialize natively. + * New types will need to define the trait as well. + */ +template <typename T> +struct is_serializable : public std::false_type{}; + +template <typename T> +struct is_promotable : public std::false_type{}; + +template <typename T> +struct type_to_avro { + static const Type type = AVRO_NUM_TYPES; +}; + +/** + * Check if a \p T is a complete type i.e. it is defined as opposed to just + * declared. + * + * is_defined<T>::value will be true or false depending on whether T is a + * complete type or not respectively. + */ +template <class T> +struct is_defined { + + typedef char yes[1]; + + typedef char no[2]; + + template <class U> static yes& test(char(*)[sizeof(U)]) { throw 0; }; + + template <class U> static no& test(...) { throw 0; }; + + static const bool value = sizeof(test<T>(0)) == sizeof(yes); +}; + +/** + * Similar to is_defined, but used to check if T is not defined. + * + * is_not_defined<T>::value will be true or false depending on whether T is an + * incomplete type or not respectively. + */ +template <class T> +struct is_not_defined { + + typedef char yes[1]; + + typedef char no[2]; + + template <class U> static yes& test(char(*)[sizeof(U)]) { throw 0; }; + + template <class U> static no& test(...) { throw 0; }; + + static const bool value = sizeof(test<T>(0)) == sizeof(no); +}; + +#define DEFINE_PRIMITIVE(CTYPE, AVROTYPE) \ +template <> \ +struct is_serializable<CTYPE> : public std::true_type{}; \ +\ +template <> \ +struct type_to_avro<CTYPE> { \ + static const Type type = AVROTYPE; \ +}; + +#define DEFINE_PROMOTABLE_PRIMITIVE(CTYPE, AVROTYPE) \ +template <> \ +struct is_promotable<CTYPE> : public std::true_type{}; \ +\ +DEFINE_PRIMITIVE(CTYPE, AVROTYPE) + +DEFINE_PROMOTABLE_PRIMITIVE(int32_t, AVRO_INT) +DEFINE_PROMOTABLE_PRIMITIVE(int64_t, AVRO_LONG) +DEFINE_PROMOTABLE_PRIMITIVE(float, AVRO_FLOAT) +DEFINE_PRIMITIVE(double, AVRO_DOUBLE) +DEFINE_PRIMITIVE(bool, AVRO_BOOL) +DEFINE_PRIMITIVE(Null, AVRO_NULL) +DEFINE_PRIMITIVE(std::string, AVRO_STRING) +DEFINE_PRIMITIVE(std::vector<uint8_t>, AVRO_BYTES) + + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Compiler.hh b/contrib/libs/apache/avro/api/Compiler.hh new file mode 100644 index 0000000000..892d60580d --- /dev/null +++ b/contrib/libs/apache/avro/api/Compiler.hh @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Compiler_hh__ +#define avro_Compiler_hh__ + +#include "Config.hh" +#include <stdint.h> +#include <istream> + +namespace avro { + +class AVRO_DECL InputStream; + +/// This class is used to implement an avro spec parser using a flex/bison +/// compiler. In order for the lexer to be reentrant, this class provides a +/// lexer object for each parse. The bison parser also uses this class to +/// build up an avro parse tree as the avro spec is parsed. + +class AVRO_DECL ValidSchema; + +/// Given a stream comtaining a JSON schema, compiles the schema to a +/// ValidSchema object. Throws if the schema cannot be compiled to a valid +/// schema + +AVRO_DECL void compileJsonSchema(std::istream &is, ValidSchema &schema); + +/// Non-throwing version of compileJsonSchema. +/// +/// \return True if no error, false if error (with the error string set) +/// + +AVRO_DECL bool compileJsonSchema(std::istream &is, ValidSchema &schema, + std::string &error); + +AVRO_DECL ValidSchema compileJsonSchemaFromStream(InputStream& is); + +AVRO_DECL ValidSchema compileJsonSchemaFromMemory(const uint8_t* input, size_t len); + +AVRO_DECL ValidSchema compileJsonSchemaFromString(const char* input); + +AVRO_DECL ValidSchema compileJsonSchemaFromString(const std::string& input); + +AVRO_DECL ValidSchema compileJsonSchemaFromFile(const char* filename); + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Config.hh b/contrib/libs/apache/avro/api/Config.hh new file mode 100644 index 0000000000..69d36f2abe --- /dev/null +++ b/contrib/libs/apache/avro/api/Config.hh @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Config_hh +#define avro_Config_hh + +// Windows DLL suport + +#ifdef _WIN32 +#pragma warning (disable: 4275 4251) + +#if defined(AVRO_DYN_LINK) +#ifdef AVRO_SOURCE +# define AVRO_DECL __declspec(dllexport) +#else +# define AVRO_DECL __declspec(dllimport) +#endif // AVRO_SOURCE +#endif // AVRO_DYN_LINK + +#include <intsafe.h> +typedef SSIZE_T ssize_t; + +#endif // _WIN32 + +#ifndef AVRO_DECL +#define AVRO_DECL +#endif + +#endif + diff --git a/contrib/libs/apache/avro/api/DataFile.hh b/contrib/libs/apache/avro/api/DataFile.hh new file mode 100644 index 0000000000..50169106b1 --- /dev/null +++ b/contrib/libs/apache/avro/api/DataFile.hh @@ -0,0 +1,419 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_DataFile_hh__ +#define avro_DataFile_hh__ + +#include "Config.hh" +#include "Encoder.hh" +#include "buffer/Buffer.hh" +#include "ValidSchema.hh" +#include "Specific.hh" +#include "Stream.hh" + +#include <map> +#include <string> +#include <vector> + +#include "array" +#include "boost/utility.hpp" +#include <boost/iostreams/filtering_stream.hpp> + +namespace avro { + +/** Specify type of compression to use when writing data files. */ +enum Codec { + NULL_CODEC, + DEFLATE_CODEC, + +#ifdef SNAPPY_CODEC_AVAILABLE + SNAPPY_CODEC +#endif + +}; + +const int SyncSize = 16; +/** + * The sync value. + */ +typedef std::array<uint8_t, SyncSize> DataFileSync; + +/** + * Type-independent portion of DataFileWriter. + * At any given point in time, at most one file can be written using + * this object. + */ +class AVRO_DECL DataFileWriterBase : boost::noncopyable { + const std::string filename_; + const ValidSchema schema_; + const EncoderPtr encoderPtr_; + const size_t syncInterval_; + Codec codec_; + + std::unique_ptr<OutputStream> stream_; + std::unique_ptr<OutputStream> buffer_; + const DataFileSync sync_; + int64_t objectCount_; + + typedef std::map<std::string, std::vector<uint8_t> > Metadata; + + Metadata metadata_; + int64_t lastSync_; + + static std::unique_ptr<OutputStream> makeStream(const char* filename); + static DataFileSync makeSync(); + + void writeHeader(); + void setMetadata(const std::string& key, const std::string& value); + + /** + * Generates a sync marker in the file. + */ + void sync(); + + /** + * Shared constructor portion since we aren't using C++11 + */ + void init(const ValidSchema &schema, size_t syncInterval, const Codec &codec); + +public: + /** + * Returns the current encoder for this writer. + */ + Encoder& encoder() const { return *encoderPtr_; } + + /** + * Returns true if the buffer has sufficient data for a sync to be + * inserted. + */ + void syncIfNeeded(); + + /** + * Returns the byte offset (within the current file) of the start of the current block being written. + */ + uint64_t getCurrentBlockStart(); + + /** + * Increments the object count. + */ + void incr() { + ++objectCount_; + } + /** + * Constructs a data file writer with the given sync interval and name. + */ + DataFileWriterBase(const char* filename, const ValidSchema& schema, + size_t syncInterval, Codec codec = NULL_CODEC); + DataFileWriterBase(std::unique_ptr<OutputStream> outputStream, + const ValidSchema& schema, size_t syncInterval, Codec codec); + + ~DataFileWriterBase(); + /** + * Closes the current file. Once closed this datafile object cannot be + * used for writing any more. + */ + void close(); + + /** + * Returns the schema for this data file. + */ + const ValidSchema& schema() const { return schema_; } + + /** + * Flushes any unwritten data into the file. + */ + void flush(); +}; + +/** + * An Avro datafile that can store objects of type T. + */ +template <typename T> +class DataFileWriter : boost::noncopyable { + std::unique_ptr<DataFileWriterBase> base_; +public: + /** + * Constructs a new data file. + */ + DataFileWriter(const char* filename, const ValidSchema& schema, + size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : + base_(new DataFileWriterBase(filename, schema, syncInterval, codec)) { } + + DataFileWriter(std::unique_ptr<OutputStream> outputStream, const ValidSchema& schema, + size_t syncInterval = 16 * 1024, Codec codec = NULL_CODEC) : + base_(new DataFileWriterBase(std::move(outputStream), schema, syncInterval, codec)) { } + + /** + * Writes the given piece of data into the file. + */ + void write(const T& datum) { + base_->syncIfNeeded(); + avro::encode(base_->encoder(), datum); + base_->incr(); + } + + /** + * Returns the byte offset (within the current file) of the start of the current block being written. + */ + uint64_t getCurrentBlockStart() { return base_->getCurrentBlockStart(); } + + + /** + * Closes the current file. Once closed this datafile object cannot be + * used for writing any more. + */ + void close() { base_->close(); } + + /** + * Returns the schema for this data file. + */ + const ValidSchema& schema() const { return base_->schema(); } + + /** + * Flushes any unwritten data into the file. + */ + void flush() { base_->flush(); } +}; + +/** + * The type independent portion of reader. + */ +class AVRO_DECL DataFileReaderBase : boost::noncopyable { + const std::string filename_; + const std::unique_ptr<InputStream> stream_; + const DecoderPtr decoder_; + int64_t objectCount_; + bool eof_; + Codec codec_; + int64_t blockStart_; + int64_t blockEnd_; + + ValidSchema readerSchema_; + ValidSchema dataSchema_; + DecoderPtr dataDecoder_; + std::unique_ptr<InputStream> dataStream_; + typedef std::map<std::string, std::vector<uint8_t> > Metadata; + + Metadata metadata_; + DataFileSync sync_; + + // for compressed buffer + std::unique_ptr<boost::iostreams::filtering_istream> os_; + std::vector<char> compressed_; + std::string uncompressed; + void readHeader(); + + void readDataBlock(); + void doSeek(int64_t position); +public: + /** + * Returns the current decoder for this reader. + */ + Decoder& decoder() { return *dataDecoder_; } + + /** + * Returns true if and only if there is more to read. + */ + bool hasMore(); + + /** + * Decrements the number of objects yet to read. + */ + void decr() { --objectCount_; } + + /** + * Constructs the reader for the given file and the reader is + * expected to use the schema that is used with data. + * This function should be called exactly once after constructing + * the DataFileReaderBase object. + */ + DataFileReaderBase(const char* filename); + + DataFileReaderBase(std::unique_ptr<InputStream> inputStream); + + /** + * Initializes the reader so that the reader and writer schemas + * are the same. + */ + void init(); + + /** + * Initializes the reader to read objects according to the given + * schema. This gives an opportunity for the reader to see the schema + * in the data file before deciding the right schema to use for reading. + * This must be called exactly once after constructing the + * DataFileReaderBase object. + */ + void init(const ValidSchema& readerSchema); + + /** + * Returns the schema for this object. + */ + const ValidSchema& readerSchema() { return readerSchema_; } + + /** + * Returns the schema stored with the data file. + */ + const ValidSchema& dataSchema() { return dataSchema_; } + + /** + * Closes the reader. No further operation is possible on this reader. + */ + void close(); + + /** + * Move to a specific, known synchronization point, for example one returned + * from tell() after sync(). + */ + void seek(int64_t position); + + /** + * Move to the next synchronization point after a position. To process a + * range of file entries, call this with the starting position, then check + * pastSync() with the end point before each use of decoder(). + */ + void sync(int64_t position); + + /** + * Return true if past the next synchronization point after a position. + */ + bool pastSync(int64_t position); + + /** + * Return the last synchronization point before our current position. + */ + int64_t previousSync(); +}; + +/** + * Reads the contents of data file one after another. + */ +template <typename T> +class DataFileReader : boost::noncopyable { + std::unique_ptr<DataFileReaderBase> base_; +public: + /** + * Constructs the reader for the given file and the reader is + * expected to use the given schema. + */ + DataFileReader(const char* filename, const ValidSchema& readerSchema) : + base_(new DataFileReaderBase(filename)) { + base_->init(readerSchema); + } + + DataFileReader(std::unique_ptr<InputStream> inputStream, const ValidSchema& readerSchema) : + base_(new DataFileReaderBase(std::move(inputStream))) { + base_->init(readerSchema); + } + + /** + * Constructs the reader for the given file and the reader is + * expected to use the schema that is used with data. + */ + DataFileReader(const char* filename) : + base_(new DataFileReaderBase(filename)) { + base_->init(); + } + + DataFileReader(std::unique_ptr<InputStream> inputStream) : + base_(new DataFileReaderBase(std::move(inputStream))) { + base_->init(); + } + + /** + * Constructs a reader using the reader base. This form of constructor + * allows the user to examine the schema of a given file and then + * decide to use the right type of data to be deserialize. Without this + * the user must know the type of data for the template _before_ + * he knows the schema within the file. + * The schema present in the data file will be used for reading + * from this reader. + */ + DataFileReader(std::unique_ptr<DataFileReaderBase> base) : base_(std::move(base)) { + base_->init(); + } + + /** + * Constructs a reader using the reader base. This form of constructor + * allows the user to examine the schema of a given file and then + * decide to use the right type of data to be deserialize. Without this + * the user must know the type of data for the template _before_ + * he knows the schema within the file. + * The argument readerSchema will be used for reading + * from this reader. + */ + DataFileReader(std::unique_ptr<DataFileReaderBase> base, + const ValidSchema& readerSchema) : base_(std::move(base)) { + base_->init(readerSchema); + } + + /** + * Reads the next entry from the data file. + * \return true if an object has been successfully read into \p datum and + * false if there are no more entries in the file. + */ + bool read(T& datum) { + if (base_->hasMore()) { + base_->decr(); + avro::decode(base_->decoder(), datum); + return true; + } + return false; + } + + /** + * Returns the schema for this object. + */ + const ValidSchema& readerSchema() { return base_->readerSchema(); } + + /** + * Returns the schema stored with the data file. + */ + const ValidSchema& dataSchema() { return base_->dataSchema(); } + + /** + * Closes the reader. No further operation is possible on this reader. + */ + void close() { return base_->close(); } + + /** + * Move to a specific, known synchronization point, for example one returned + * from previousSync(). + */ + void seek(int64_t position) { base_->seek(position); } + + /** + * Move to the next synchronization point after a position. To process a + * range of file entries, call this with the starting position, then check + * pastSync() with the end point before each call to read(). + */ + void sync(int64_t position) { base_->sync(position); } + + /** + * Return true if past the next synchronization point after a position. + */ + bool pastSync(int64_t position) { return base_->pastSync(position); } + + /** + * Return the last synchronization point before our current position. + */ + int64_t previousSync() { return base_->previousSync(); } +}; + +} // namespace avro +#endif diff --git a/contrib/libs/apache/avro/api/Decoder.hh b/contrib/libs/apache/avro/api/Decoder.hh new file mode 100644 index 0000000000..5356d79f32 --- /dev/null +++ b/contrib/libs/apache/avro/api/Decoder.hh @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Decoder_hh__ +#define avro_Decoder_hh__ + +#include "Config.hh" +#include <stdint.h> +#include <string> +#include <vector> +#include <memory> + +#include "ValidSchema.hh" +#include "Stream.hh" + +/// \file +/// +/// Low level support for decoding avro values. +/// This class has two types of functions. One type of functions support +/// decoding of leaf values (for example, decodeLong and +/// decodeString). These functions have analogs in Encoder. +/// +/// The other type of functions support decoding of maps and arrays. +/// These functions are arrayStart, startItem, and arrayEnd +/// (and similar functions for maps). + +namespace avro { + +/** + * Decoder is an interface implemented by every decoder capable + * of decoding Avro data. + */ +class AVRO_DECL Decoder { +public: + virtual ~Decoder() { }; + /// All future decoding will come from is, which should be valid + /// until replaced by another call to init() or this Decoder is + /// destructed. + virtual void init(InputStream& is) = 0; + + /// Decodes a null from the current stream. + virtual void decodeNull() = 0; + + /// Decodes a bool from the current stream + virtual bool decodeBool() = 0; + + /// Decodes a 32-bit int from the current stream. + virtual int32_t decodeInt() = 0; + + /// Decodes a 64-bit signed int from the current stream. + virtual int64_t decodeLong() = 0; + + /// Decodes a single-precision floating point number from current stream. + virtual float decodeFloat() = 0; + + /// Decodes a double-precision floating point number from current stream. + virtual double decodeDouble() = 0; + + /// Decodes a UTF-8 string from the current stream. + std::string decodeString() { + std::string result; + decodeString(result); + return result; + } + + /** + * Decodes a UTF-8 string from the stream and assigns it to value. + */ + virtual void decodeString(std::string& value) = 0; + + /// Skips a string on the current stream. + virtual void skipString() = 0; + + /// Decodes arbitray binary data from the current stream. + std::vector<uint8_t> decodeBytes() { + std::vector<uint8_t> result; + decodeBytes(result); + return result; + } + + /// Decodes arbitrary binary data from the current stream and puts it + /// in value. + virtual void decodeBytes(std::vector<uint8_t>& value) = 0; + + /// Skips bytes on the current stream. + virtual void skipBytes() = 0; + + /** + * Decodes fixed length binary from the current stream. + * \param[in] n The size (byte count) of the fixed being read. + * \return The fixed data that has been read. The size of the returned + * vector is guaranteed to be equal to \p n. + */ + std::vector<uint8_t> decodeFixed(size_t n) { + std::vector<uint8_t> result; + decodeFixed(n, result); + return result; + } + + /** + * Decodes a fixed from the current stream. + * \param[in] n The size (byte count) of the fixed being read. + * \param[out] value The value that receives the fixed. The vector will + * be size-adjusted based on the fixed's size. + */ + virtual void decodeFixed(size_t n, std::vector<uint8_t>& value) = 0; + + /// Skips fixed length binary on the current stream. + virtual void skipFixed(size_t n) = 0; + + /// Decodes enum from the current stream. + virtual size_t decodeEnum() = 0; + + /// Start decoding an array. Returns the number of entries in first chunk. + virtual size_t arrayStart() = 0; + + /// Returns the number of entries in next chunk. 0 if last. + virtual size_t arrayNext() = 0; + + /// Tries to skip an array. If it can, it returns 0. Otherwise + /// it returns the number of elements to be skipped. The client + /// should skip the individual items. In such cases, skipArray + /// is identical to arrayStart. + virtual size_t skipArray() = 0; + + /// Start decoding a map. Returns the number of entries in first chunk. + virtual size_t mapStart() = 0; + + /// Returns the number of entries in next chunk. 0 if last. + virtual size_t mapNext() = 0; + + /// Tries to skip a map. If it can, it returns 0. Otherwise + /// it returns the number of elements to be skipped. The client + /// should skip the individual items. In such cases, skipMap + /// is identical to mapStart. + virtual size_t skipMap() = 0; + + /// Decodes a branch of a union. The actual value is to follow. + virtual size_t decodeUnionIndex() = 0; + + /// Drains any additional data at the end of the current entry in a stream. + /// It also returns any unused bytes back to any underlying input stream. + /// One situation this happens is when the reader's schema and + /// the writer's schema are records but are different and the writer's + /// record has more fields at the end of the record. + /// Leaving such data unread is usually not a problem. If multiple + /// records are stored consecutively in a stream (e.g. Avro data file) + /// any attempt to read the next record will automatically skip + /// those extra fields of the current record. It would still leave + /// the extra fields at the end of the last record in the stream. + /// This would mean that the stream is not in a good state. For example, + /// if some non-avro information is stored at the end of the stream, + /// the consumers of such data would see the bytes left behind + /// by the avro decoder. Similar set of problems occur if the Decoder + /// consumes more than what it should. + virtual void drain() = 0; +}; + +/** + * Shared pointer to Decoder. + */ +typedef std::shared_ptr<Decoder> DecoderPtr; + +/** + * ResolvingDecoder is derived from \ref Decoder, with an additional + * function to obtain the field ordering of fields within a record. + */ +class AVRO_DECL ResolvingDecoder : public Decoder { +public: + /// Returns the order of fields for records. + /// The order of fields could be different from the order of their + /// order in the schema because the writer's field order could + /// be different. In order to avoid buffering and later use, + /// we return the values in the writer's field order. + virtual const std::vector<size_t>& fieldOrder() = 0; +}; + +/** + * Shared pointer to ResolvingDecoder. + */ +typedef std::shared_ptr<ResolvingDecoder> ResolvingDecoderPtr; +/** + * Returns an decoder that can decode binary Avro standard. + */ +AVRO_DECL DecoderPtr binaryDecoder(); + +/** + * Returns an decoder that validates sequence of calls to an underlying + * Decoder against the given schema. + */ +AVRO_DECL DecoderPtr validatingDecoder(const ValidSchema& schema, + const DecoderPtr& base); + +/** + * Returns an decoder that can decode Avro standard for JSON. + */ +AVRO_DECL DecoderPtr jsonDecoder(const ValidSchema& schema); + +/** + * Returns a decoder that decodes avro data from base written according to + * writerSchema and resolves against readerSchema. + * The client uses the decoder as if the data were written using readerSchema. + * // FIXME: Handle out of order fields. + */ +AVRO_DECL ResolvingDecoderPtr resolvingDecoder(const ValidSchema& writer, + const ValidSchema& reader, const DecoderPtr& base); + + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Encoder.hh b/contrib/libs/apache/avro/api/Encoder.hh new file mode 100644 index 0000000000..7849e93458 --- /dev/null +++ b/contrib/libs/apache/avro/api/Encoder.hh @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Encoder_hh__ +#define avro_Encoder_hh__ + +#include "Config.hh" +#include <stdint.h> +#include <string> +#include <vector> +#include <memory> + +#include "ValidSchema.hh" +#include "Stream.hh" + +/// \file +/// +/// Low level support for encoding avro values. +/// This class has two types of funtions. One type of functions support +/// the writing of leaf values (for example, encodeLong and +/// encodeString). These functions have analogs in Decoder. +/// +/// The other type of functions support the writing of maps and arrays. +/// These functions are arrayStart, startItem, and arrayEnd +/// (and similar functions for maps). +/// Some implementations of Encoder handle the +/// buffering required to break large maps and arrays into blocks, +/// which is necessary for applications that want to do streaming. + +namespace avro { + +/** + * The abstract base class for all Avro encoders. The implementations + * differ in the method of encoding (binary vresus JSON) or in capabilities + * such as ability to verify the order of invocation of different functions. + */ +class AVRO_DECL Encoder { +public: + virtual ~Encoder() { }; + /// All future encodings will go to os, which should be valid until + /// it is reset with another call to init() or the encoder is + /// destructed. + virtual void init(OutputStream& os) = 0; + + /// Flushes any data in internal buffers. + virtual void flush() = 0; + + /// Returns the number of bytes produced so far. + /// For a meaningful value, do a flush() before invoking this function. + virtual int64_t byteCount() const = 0; + + /// Encodes a null to the current stream. + virtual void encodeNull() = 0; + + /// Encodes a bool to the current stream + virtual void encodeBool(bool b) = 0; + + /// Encodes a 32-bit int to the current stream. + virtual void encodeInt(int32_t i) = 0; + + /// Encodes a 64-bit signed int to the current stream. + virtual void encodeLong(int64_t l) = 0; + + /// Encodes a single-precision floating point number to the current stream. + virtual void encodeFloat(float f) = 0; + + /// Encodes a double-precision floating point number to the current stream. + virtual void encodeDouble(double d) = 0; + + /// Encodes a UTF-8 string to the current stream. + virtual void encodeString(const std::string& s) = 0; + + /** + * Encodes aribtray binary data into tthe current stream as Avro "bytes" + * data type. + * \param bytes Where the data is + * \param len Number of bytes at \p bytes. + */ + virtual void encodeBytes(const uint8_t *bytes, size_t len) = 0; + + /** + * Encodes aribtray binary data into tthe current stream as Avro "bytes" + * data type. + * \param bytes The data. + */ + void encodeBytes(const std::vector<uint8_t>& bytes) { + uint8_t b = 0; + encodeBytes(bytes.empty() ? &b : bytes.data(), bytes.size()); + } + + /// Encodes fixed length binary to the current stream. + virtual void encodeFixed(const uint8_t *bytes, size_t len) = 0; + + /** + * Encodes an Avro data type Fixed. + * \param bytes The fixed, the length of which is taken as the size + * of fixed. + */ + void encodeFixed(const std::vector<uint8_t>& bytes) { + encodeFixed(bytes.data(), bytes.size()); + } + + /// Encodes enum to the current stream. + virtual void encodeEnum(size_t e) = 0; + + /// Indicates that an array of items is being encoded. + virtual void arrayStart() = 0; + + /// Indicates that the current array of items have ended. + virtual void arrayEnd() = 0; + + /// Indicates that a map of items is being encoded. + virtual void mapStart() = 0; + + /// Indicates that the current map of items have ended. + virtual void mapEnd() = 0; + + /// Indicates that count number of items are to follow in the current array + /// or map. + virtual void setItemCount(size_t count) = 0; + + /// Marks a beginning of an item in the current array or map. + virtual void startItem() = 0; + + /// Encodes a branch of a union. The actual value is to follow. + virtual void encodeUnionIndex(size_t e) = 0; +}; + +/** + * Shared pointer to Encoder. + */ +typedef std::shared_ptr<Encoder> EncoderPtr; + +/** + * Returns an encoder that can encode binary Avro standard. + */ +AVRO_DECL EncoderPtr binaryEncoder(); + +/** + * Returns an encoder that validates sequence of calls to an underlying + * Encoder against the given schema. + */ +AVRO_DECL EncoderPtr validatingEncoder(const ValidSchema& schema, + const EncoderPtr& base); + +/** + * Returns an encoder that encodes Avro standard for JSON. + */ +AVRO_DECL EncoderPtr jsonEncoder(const ValidSchema& schema); + +/** + * Returns an encoder that encodes Avro standard for pretty printed JSON. + */ +AVRO_DECL EncoderPtr jsonPrettyEncoder(const ValidSchema& schema); + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Exception.hh b/contrib/libs/apache/avro/api/Exception.hh new file mode 100644 index 0000000000..7c5410f96b --- /dev/null +++ b/contrib/libs/apache/avro/api/Exception.hh @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Exception_hh__ +#define avro_Exception_hh__ + +#include "Config.hh" +#include <stdexcept> +#include <boost/format.hpp> + +namespace avro { + +/// Wrapper for std::runtime_error that provides convenience constructor +/// for boost::format objects + +class AVRO_DECL Exception : public virtual std::runtime_error +{ + public: + + Exception(const std::string &msg) : + std::runtime_error(msg) + { } + + Exception(const boost::format &msg) : + std::runtime_error( boost::str(msg)) + { } +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Generic.hh b/contrib/libs/apache/avro/api/Generic.hh new file mode 100644 index 0000000000..e1b3a8290c --- /dev/null +++ b/contrib/libs/apache/avro/api/Generic.hh @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Generic_hh__ +#define avro_Generic_hh__ + +#include <boost/utility.hpp> + +#include "Config.hh" +#include "Types.hh" +#include "Encoder.hh" +#include "Decoder.hh" +#include "GenericDatum.hh" + +namespace avro { +/** + * A utility class to read generic datum from decoders. + */ +class AVRO_DECL GenericReader : boost::noncopyable { + const ValidSchema schema_; + const bool isResolving_; + const DecoderPtr decoder_; + + static void read(GenericDatum& datum, Decoder& d, bool isResolving); +public: + /** + * Constructs a reader for the given schema using the given decoder. + */ + GenericReader(const ValidSchema& s, const DecoderPtr& decoder); + + /** + * Constructs a reader for the given reader's schema \c readerSchema + * using the given + * decoder which holds data matching writer's schema \c writerSchema. + */ + GenericReader(const ValidSchema& writerSchema, + const ValidSchema& readerSchema, const DecoderPtr& decoder); + + /** + * Reads a value off the decoder. + */ + void read(GenericDatum& datum) const; + + /** + * Drains any residual bytes in the input stream (e.g. because + * reader's schema has no use of them) and return unused bytes + * back to the underlying input stream. + */ + void drain() { + decoder_->drain(); + } + /** + * Reads a generic datum from the stream, using the given schema. + */ + static void read(Decoder& d, GenericDatum& g); + + /** + * Reads a generic datum from the stream, using the given schema. + */ + static void read(Decoder& d, GenericDatum& g, const ValidSchema& s); +}; + + +/** + * A utility class to write generic datum to encoders. + */ +class AVRO_DECL GenericWriter : boost::noncopyable { + const ValidSchema schema_; + const EncoderPtr encoder_; + + static void write(const GenericDatum& datum, Encoder& e); +public: + /** + * Constructs a writer for the given schema using the given encoder. + */ + GenericWriter(const ValidSchema& s, const EncoderPtr& encoder); + + /** + * Writes a value onto the encoder. + */ + void write(const GenericDatum& datum) const; + + /** + * Writes a generic datum on to the stream. + */ + static void write(Encoder& e, const GenericDatum& g); + + /** + * Writes a generic datum on to the stream, using the given schema. + * Retained for backward compatibility. + */ + static void write(Encoder& e, const GenericDatum& g, const ValidSchema&) { + write(e, g); + } +}; + +template <typename T> struct codec_traits; + +/** + * Specialization of codec_traits for Generic datum along with its schema. + * This is maintained for compatibility with old code. Please use the + * cleaner codec_traits<GenericDatum> instead. + */ +template <> struct codec_traits<std::pair<ValidSchema, GenericDatum> > { + /** Encodes */ + static void encode(Encoder& e, + const std::pair<ValidSchema, GenericDatum>& p) { + GenericWriter::write(e, p.second, p.first); + } + + /** Decodes */ + static void decode(Decoder& d, std::pair<ValidSchema, GenericDatum>& p) { + GenericReader::read(d, p.second, p.first); + } +}; + +/** + * Specialization of codec_traits for GenericDatum. + */ +template <> struct codec_traits<GenericDatum> { + /** Encodes */ + static void encode(Encoder& e, const GenericDatum& g) { + GenericWriter::write(e, g); + } + + /** Decodes */ + static void decode(Decoder& d, GenericDatum& g) { + GenericReader::read(d, g); + } +}; + +} // namespace avro +#endif + diff --git a/contrib/libs/apache/avro/api/GenericDatum.hh b/contrib/libs/apache/avro/api/GenericDatum.hh new file mode 100644 index 0000000000..ac0e5e5e20 --- /dev/null +++ b/contrib/libs/apache/avro/api/GenericDatum.hh @@ -0,0 +1,576 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_GenericDatum_hh__ +#define avro_GenericDatum_hh__ + +#include <stdint.h> +#include <vector> +#include <map> +#include <string> + +#if __cplusplus >= 201703L +#include <any> +#else +#include "boost/any.hpp" +#endif + +#include "LogicalType.hh" +#include "Node.hh" +#include "ValidSchema.hh" + +namespace avro { + +/** + * Generic datum which can hold any Avro type. The datum has a type + * and a value. The type is one of the Avro data types. The C++ type for + * value corresponds to the Avro type. + * \li An Avro <tt>null</tt> corresponds to no C++ type. It is illegal to + * to try to access values for <tt>null</tt>. + * \li Avro <tt>boolean</tt> maps to C++ <tt>bool</tt> + * \li Avro <tt>int</tt> maps to C++ <tt>int32_t</tt>. + * \li Avro <tt>long</tt> maps to C++ <tt>int64_t</tt>. + * \li Avro <tt>float</tt> maps to C++ <tt>float</tt>. + * \li Avro <tt>double</tt> maps to C++ <tt>double</tt>. + * \li Avro <tt>string</tt> maps to C++ <tt>std::string</tt>. + * \li Avro <tt>bytes</tt> maps to C++ <tt>std::vector<uint_t></tt>. + * \li Avro <tt>fixed</tt> maps to C++ class <tt>GenericFixed</tt>. + * \li Avro <tt>enum</tt> maps to C++ class <tt>GenericEnum</tt>. + * \li Avro <tt>array</tt> maps to C++ class <tt>GenericArray</tt>. + * \li Avro <tt>map</tt> maps to C++ class <tt>GenericMap</tt>. + * \li There is no C++ type corresponding to Avro <tt>union</tt>. The + * object should have the C++ type corresponing to one of the constituent + * types of the union. + * + */ +class AVRO_DECL GenericDatum { +protected: + Type type_; + LogicalType logicalType_; +#if __cplusplus >= 201703L + std::any value_; +#else + boost::any value_; +#endif + + GenericDatum(Type t) + : type_(t), logicalType_(LogicalType::NONE) { } + + GenericDatum(Type t, LogicalType logicalType) + : type_(t), logicalType_(logicalType) { } + + template <typename T> + GenericDatum(Type t, LogicalType logicalType, const T& v) + : type_(t), logicalType_(logicalType), value_(v) { } + + void init(const NodePtr& schema); +public: + /** + * The avro data type this datum holds. + */ + Type type() const; + + /** + * The avro logical type that augments the main data type this datum holds. + */ + LogicalType logicalType() const; + + /** + * Returns the value held by this datum. + * T The type for the value. This must correspond to the + * avro type returned by type(). + */ + template<typename T> const T& value() const; + + /** + * Returns the reference to the value held by this datum, which + * can be used to change the contents. Please note that only + * value can be changed, the data type of the value held cannot + * be changed. + * + * T The type for the value. This must correspond to the + * avro type returned by type(). + */ + template<typename T> T& value(); + + /** + * Returns true if and only if this datum is a union. + */ + bool isUnion() const { return type_ == AVRO_UNION; } + + /** + * Returns the index of the current branch, if this is a union. + * \sa isUnion(). + */ + size_t unionBranch() const; + + /** + * Selects a new branch in the union if this is a union. + * \sa isUnion(). + */ + void selectBranch(size_t branch); + + /// Makes a new AVRO_NULL datum. + GenericDatum() : type_(AVRO_NULL), logicalType_(LogicalType::NONE) { } + + /// Makes a new AVRO_BOOL datum whose value is of type bool. + GenericDatum(bool v) + : type_(AVRO_BOOL), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_INT datum whose value is of type int32_t. + GenericDatum(int32_t v) + : type_(AVRO_INT), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_LONG datum whose value is of type int64_t. + GenericDatum(int64_t v) + : type_(AVRO_LONG), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_FLOAT datum whose value is of type float. + GenericDatum(float v) + : type_(AVRO_FLOAT), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_DOUBLE datum whose value is of type double. + GenericDatum(double v) + : type_(AVRO_DOUBLE), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_STRING datum whose value is of type std::string. + GenericDatum(const std::string& v) + : type_(AVRO_STRING), logicalType_(LogicalType::NONE), value_(v) { } + + /// Makes a new AVRO_BYTES datum whose value is of type + /// std::vector<uint8_t>. + GenericDatum(const std::vector<uint8_t>& v) : + type_(AVRO_BYTES), logicalType_(LogicalType::NONE), value_(v) { } + + /** + * Constructs a datum corresponding to the given avro type. + * The value will the appropriate default corresponding to the + * data type. + * \param schema The schema that defines the avro type. + */ + GenericDatum(const NodePtr& schema); + + /** + * Constructs a datum corresponding to the given avro type and set + * the value. + * \param schema The schema that defines the avro type. + * \param v The value for this type. + */ + template<typename T> + GenericDatum(const NodePtr& schema, const T& v) : + type_(schema->type()), logicalType_(schema->logicalType()) { + init(schema); +#if __cplusplus >= 201703L + *std::any_cast<T>(&value_) = v; +#else + *boost::any_cast<T>(&value_) = v; +#endif + } + + /** + * Constructs a datum corresponding to the given avro type. + * The value will the appropriate default corresponding to the + * data type. + * \param schema The schema that defines the avro type. + */ + GenericDatum(const ValidSchema& schema); +}; + +/** + * The base class for all generic type for containers. + */ +class AVRO_DECL GenericContainer { + NodePtr schema_; + static void assertType(const NodePtr& schema, Type type); +protected: + /** + * Constructs a container corresponding to the given schema. + */ + GenericContainer(Type type, const NodePtr& s) : schema_(s) { + assertType(s, type); + } + +public: + /// Returns the schema for this object + const NodePtr& schema() const { + return schema_; + } +}; + +/** + * Generic container for unions. + */ +class AVRO_DECL GenericUnion : public GenericContainer { + size_t curBranch_; + GenericDatum datum_; + +public: + /** + * Constructs a generic union corresponding to the given schema \p schema, + * and the given value. The schema should be of Avro type union + * and the value should correspond to one of the branches of the union. + */ + GenericUnion(const NodePtr& schema) : + GenericContainer(AVRO_UNION, schema), curBranch_(schema->leaves()) { + selectBranch(0); + } + + /** + * Returns the index of the current branch. + */ + size_t currentBranch() const { return curBranch_; } + + /** + * Selects a new branch. The type for the value is changed accordingly. + * \param branch The index for the selected branch. + */ + void selectBranch(size_t branch) { + if (curBranch_ != branch) { + datum_ = GenericDatum(schema()->leafAt(branch)); + curBranch_ = branch; + } + } + + /** + * Returns the datum corresponding to the currently selected branch + * in this union. + */ + GenericDatum& datum() { + return datum_; + } + + /** + * Returns the datum corresponding to the currently selected branch + * in this union. + */ + const GenericDatum& datum() const { + return datum_; + } +}; + +/** + * The generic container for Avro records. + */ +class AVRO_DECL GenericRecord : public GenericContainer { + std::vector<GenericDatum> fields_; +public: + /** + * Constructs a generic record corresponding to the given schema \p schema, + * which should be of Avro type record. + */ + GenericRecord(const NodePtr& schema); + + /** + * Returns the number of fields in the current record. + */ + size_t fieldCount() const { + return fields_.size(); + } + + /** + * Returns index of the field with the given name \p name + */ + size_t fieldIndex(const std::string& name) const { + size_t index = 0; + if (!schema()->nameIndex(name, index)) { + throw Exception("Invalid field name: " + name); + } + return index; + } + + /** + * Returns true if a field with the given name \p name is located in this r + * false otherwise + */ + bool hasField(const std::string& name) const { + size_t index = 0; + return schema()->nameIndex(name, index); + } + + /** + * Returns the field with the given name \p name. + */ + const GenericDatum& field(const std::string& name) const { + return fieldAt(fieldIndex(name)); + } + + /** + * Returns the reference to the field with the given name \p name, + * which can be used to change the contents. + */ + GenericDatum& field(const std::string& name) { + return fieldAt(fieldIndex(name)); + } + + /** + * Returns the field at the given position \p pos. + */ + const GenericDatum& fieldAt(size_t pos) const { + return fields_[pos]; + } + + /** + * Returns the reference to the field at the given position \p pos, + * which can be used to change the contents. + */ + GenericDatum& fieldAt(size_t pos) { + return fields_[pos]; + } + + /** + * Replaces the field at the given position \p pos with \p v. + */ + void setFieldAt(size_t pos, const GenericDatum& v) { + // assertSameType(v, schema()->leafAt(pos)); + fields_[pos] = v; + } +}; + +/** + * The generic container for Avro arrays. + */ +class AVRO_DECL GenericArray : public GenericContainer { +public: + /** + * The contents type for the array. + */ + typedef std::vector<GenericDatum> Value; + + /** + * Constructs a generic array corresponding to the given schema \p schema, + * which should be of Avro type array. + */ + GenericArray(const NodePtr& schema) : GenericContainer(AVRO_ARRAY, schema) { + } + + /** + * Returns the contents of this array. + */ + const Value& value() const { + return value_; + } + + /** + * Returns the reference to the contents of this array. + */ + Value& value() { + return value_; + } +private: + Value value_; +}; + +/** + * The generic container for Avro maps. + */ +class AVRO_DECL GenericMap : public GenericContainer { +public: + /** + * The contents type for the map. + */ + typedef std::vector<std::pair<std::string, GenericDatum> > Value; + + /** + * Constructs a generic map corresponding to the given schema \p schema, + * which should be of Avro type map. + */ + GenericMap(const NodePtr& schema) : GenericContainer(AVRO_MAP, schema) { + } + + /** + * Returns the contents of this map. + */ + const Value& value() const { + return value_; + } + + /** + * Returns the reference to the contents of this map. + */ + Value& value() { + return value_; + } +private: + Value value_; +}; + +/** + * Generic container for Avro enum. + */ +class AVRO_DECL GenericEnum : public GenericContainer { + size_t value_; + + static size_t index(const NodePtr& schema, const std::string& symbol) { + size_t result; + if (schema->nameIndex(symbol, result)) { + return result; + } + throw Exception("No such symbol"); + } + +public: + /** + * Constructs a generic enum corresponding to the given schema \p schema, + * which should be of Avro type enum. + */ + GenericEnum(const NodePtr& schema) : + GenericContainer(AVRO_ENUM, schema), value_(0) { + } + + GenericEnum(const NodePtr& schema, const std::string& symbol) : + GenericContainer(AVRO_ENUM, schema), value_(index(schema, symbol)) { + } + + /** + * Returns the symbol corresponding to the cardinal \p n. If the + * value for \p n is not within the limits an exception is thrown. + */ + const std::string& symbol(size_t n) { + if (n < schema()->names()) { + return schema()->nameAt(n); + } + throw Exception("Not as many symbols"); + } + + /** + * Returns the cardinal for the given symbol \c symbol. If the symbol + * is not defined for this enum and exception is thrown. + */ + size_t index(const std::string& symbol) const { + return index(schema(), symbol); + } + + /** + * Set the value for this enum corresponding to the given symbol \c symbol. + */ + size_t set(const std::string& symbol) { + return value_ = index(symbol); + } + + /** + * Set the value for this enum corresponding to the given cardinal \c n. + */ + void set(size_t n) { + if (n < schema()->names()) { + value_ = n; + return; + } + throw Exception("Not as many symbols"); + } + + /** + * Returns the cardinal for the current value of this enum. + */ + size_t value() const { + return value_; + } + + /** + * Returns the symbol for the current value of this enum. + */ + const std::string& symbol() const { + return schema()->nameAt(value_); + } +}; + +/** + * Generic container for Avro fixed. + */ +class AVRO_DECL GenericFixed : public GenericContainer { + std::vector<uint8_t> value_; +public: + /** + * Constructs a generic enum corresponding to the given schema \p schema, + * which should be of Avro type fixed. + */ + GenericFixed(const NodePtr& schema) : GenericContainer(AVRO_FIXED, schema) { + value_.resize(schema->fixedSize()); + } + + GenericFixed(const NodePtr& schema, const std::vector<uint8_t>& v) : + GenericContainer(AVRO_FIXED, schema), value_(v) { } + + /** + * Returns the contents of this fixed. + */ + const std::vector<uint8_t>& value() const { + return value_; + } + + /** + * Returns the reference to the contents of this fixed. + */ + std::vector<uint8_t>& value() { + return value_; + } +}; + +inline Type GenericDatum::type() const { + return (type_ == AVRO_UNION) ? +#if __cplusplus >= 201703L + std::any_cast<GenericUnion>(&value_)->datum().type() : +#else + boost::any_cast<GenericUnion>(&value_)->datum().type() : +#endif + type_; +} + +inline LogicalType GenericDatum::logicalType() const { + return logicalType_; +} + +template<typename T> T& GenericDatum::value() { + return (type_ == AVRO_UNION) ? +#if __cplusplus >= 201703L + std::any_cast<GenericUnion>(&value_)->datum().value<T>() : + *std::any_cast<T>(&value_); +#else + boost::any_cast<GenericUnion>(&value_)->datum().value<T>() : + *boost::any_cast<T>(&value_); +#endif +} + +template<typename T> const T& GenericDatum::value() const { + return (type_ == AVRO_UNION) ? +#if __cplusplus >= 201703L + std::any_cast<GenericUnion>(&value_)->datum().value<T>() : + *std::any_cast<T>(&value_); +#else + boost::any_cast<GenericUnion>(&value_)->datum().value<T>() : + *boost::any_cast<T>(&value_); +#endif +} + +inline size_t GenericDatum::unionBranch() const { +#if __cplusplus >= 201703L + return std::any_cast<GenericUnion>(&value_)->currentBranch(); +#else + return boost::any_cast<GenericUnion>(&value_)->currentBranch(); +#endif +} + +inline void GenericDatum::selectBranch(size_t branch) { +#if __cplusplus >= 201703L + std::any_cast<GenericUnion>(&value_)->selectBranch(branch); +#else + boost::any_cast<GenericUnion>(&value_)->selectBranch(branch); +#endif +} + +} // namespace avro +#endif // avro_GenericDatum_hh__ diff --git a/contrib/libs/apache/avro/api/Layout.hh b/contrib/libs/apache/avro/api/Layout.hh new file mode 100644 index 0000000000..ffd810c8b0 --- /dev/null +++ b/contrib/libs/apache/avro/api/Layout.hh @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Layout_hh__ +#define avro_Layout_hh__ + +#include <boost/noncopyable.hpp> +#include "Config.hh" + +/// \file Layout.hh +/// + +namespace avro { + +class AVRO_DECL Layout : private boost::noncopyable { + + protected: + + Layout(size_t offset = 0) : + offset_(offset) + {} + + public: + + size_t offset() const { + return offset_; + } + + virtual ~Layout() {} + + private: + + const size_t offset_; +}; + +class AVRO_DECL PrimitiveLayout : public Layout { + + public: + + PrimitiveLayout(size_t offset = 0) : + Layout(offset) + {} +}; + +class AVRO_DECL CompoundLayout : public Layout { + + public: + + CompoundLayout(size_t offset = 0) : + Layout(offset) + {} + + void add(std::unique_ptr<Layout> &layout) { + layouts_.push_back(std::move(layout)); + } + + const Layout &at (size_t idx) const { + return *layouts_.at(idx); + } + + private: + + std::vector<std::unique_ptr<Layout> > layouts_; +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/LogicalType.hh b/contrib/libs/apache/avro/api/LogicalType.hh new file mode 100644 index 0000000000..33972788fb --- /dev/null +++ b/contrib/libs/apache/avro/api/LogicalType.hh @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_LogicalType_hh__ +#define avro_LogicalType_hh__ + +#include <iostream> + +#include "Config.hh" + +namespace avro { + +class AVRO_DECL LogicalType { + public: + enum Type { + NONE, + DECIMAL, + DATE, + TIME_MILLIS, + TIME_MICROS, + TIMESTAMP_MILLIS, + TIMESTAMP_MICROS, + DURATION, + UUID + }; + + explicit LogicalType(Type type); + + Type type() const; + + // Precision and scale can only be set for the DECIMAL logical type. + // Precision must be positive and scale must be either positive or zero. The + // setters will throw an exception if they are called on any type other + // than DECIMAL. + void setPrecision(int precision); + int precision() const { return precision_; } + void setScale(int scale); + int scale() const { return scale_; } + + void printJson(std::ostream& os) const; + + private: + Type type_; + int precision_; + int scale_; +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Node.hh b/contrib/libs/apache/avro/api/Node.hh new file mode 100644 index 0000000000..205b06f00e --- /dev/null +++ b/contrib/libs/apache/avro/api/Node.hh @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Node_hh__ +#define avro_Node_hh__ + +#include "Config.hh" + +#include <cassert> +#include <memory> +#include <boost/noncopyable.hpp> + +#include "Exception.hh" +#include "LogicalType.hh" +#include "Types.hh" +#include "SchemaResolution.hh" + +namespace avro { + +class Node; +class GenericDatum; + +typedef std::shared_ptr<Node> NodePtr; + +class AVRO_DECL Name { + std::string ns_; + std::string simpleName_; +public: + Name() { } + Name(const std::string& fullname); + Name(const std::string& simpleName, const std::string& ns) : ns_(ns), simpleName_(simpleName) { check(); } + + const std::string fullname() const; + const std::string& ns() const { return ns_; } + const std::string& simpleName() const { return simpleName_; } + + void ns(const std::string& n) { ns_ = n; } + void simpleName(const std::string& n) { simpleName_ = n; } + void fullname(const std::string& n); + + bool operator < (const Name& n) const; + void check() const; + bool operator == (const Name& n) const; + bool operator != (const Name& n) const { return !((*this) == n); } + void clear() { + ns_.clear(); + simpleName_.clear(); + } + operator std::string() const { + return fullname(); + } +}; + +inline +std::ostream& operator << (std::ostream& os, const Name& n) { + return os << n.fullname(); +} + +/// Node is the building block for parse trees. Each node represents an avro +/// type. Compound types have leaf nodes that represent the types they are +/// composed of. +/// +/// The user does not use the Node object directly, they interface with Schema +/// objects. +/// +/// The Node object uses reference-counted pointers. This is so that schemas +/// may be reused in other schemas, without needing to worry about memory +/// deallocation for nodes that are added to multiple schema parse trees. +/// +/// Node has minimal implementation, serving as an abstract base class for +/// different node types. +/// + +class AVRO_DECL Node : private boost::noncopyable +{ + public: + + Node(Type type) : + type_(type), + logicalType_(LogicalType::NONE), + locked_(false) + {} + + virtual ~Node(); + + Type type() const { + return type_; + } + + LogicalType logicalType() const { + return logicalType_; + } + + void setLogicalType(LogicalType logicalType); + + void lock() { + locked_ = true; + } + + bool locked() const { + return locked_; + } + + virtual bool hasName() const = 0; + + void setName(const Name &name) { + checkLock(); + checkName(name); + doSetName(name); + } + virtual const Name &name() const = 0; + + virtual const std::string &getDoc() const = 0; + void setDoc(const std::string &doc) { + checkLock(); + doSetDoc(doc); + } + + void addLeaf(const NodePtr &newLeaf) { + checkLock(); + doAddLeaf(newLeaf); + } + virtual size_t leaves() const = 0; + virtual const NodePtr& leafAt(int index) const = 0; + virtual const GenericDatum& defaultValueAt(int index) { + throw Exception(boost::format("No default value at: %1%") % index); + } + + void addName(const std::string &name) { + checkLock(); + checkName(name); + doAddName(name); + } + virtual size_t names() const = 0; + virtual const std::string &nameAt(int index) const = 0; + virtual bool nameIndex(const std::string &name, size_t &index) const = 0; + + void setFixedSize(int size) { + checkLock(); + doSetFixedSize(size); + } + virtual int fixedSize() const = 0; + + virtual bool isValid() const = 0; + + virtual SchemaResolution resolve(const Node &reader) const = 0; + + virtual void printJson(std::ostream &os, int depth) const = 0; + + virtual void printBasicInfo(std::ostream &os) const = 0; + + virtual void setLeafToSymbolic(int index, const NodePtr &node) = 0; + + // Serialize the default value GenericDatum g for the node contained + // in a record node. + virtual void printDefaultToJson(const GenericDatum& g, std::ostream &os, + int depth) const = 0; + + protected: + + void checkLock() const { + if(locked()) { + throw Exception("Cannot modify locked schema"); + } + } + + virtual void checkName(const Name &name) const { + name.check(); + } + + virtual void doSetName(const Name &name) = 0; + virtual void doSetDoc(const std::string &name) = 0; + + virtual void doAddLeaf(const NodePtr &newLeaf) = 0; + virtual void doAddName(const std::string &name) = 0; + virtual void doSetFixedSize(int size) = 0; + + private: + + const Type type_; + LogicalType logicalType_; + bool locked_; +}; + +} // namespace avro + +namespace std { +inline std::ostream& operator<<(std::ostream& os, const avro::Node& n) +{ + n.printJson(os, 0); + return os; +} +} + + +#endif diff --git a/contrib/libs/apache/avro/api/NodeConcepts.hh b/contrib/libs/apache/avro/api/NodeConcepts.hh new file mode 100644 index 0000000000..e914d925b6 --- /dev/null +++ b/contrib/libs/apache/avro/api/NodeConcepts.hh @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_NodeConcepts_hh__ +#define avro_NodeConcepts_hh__ + +#include "Config.hh" + +#include <vector> +#include <map> +#include "Exception.hh" + +namespace avro { + + +/// +/// The concept classes are used to simplify NodeImpl. Since different types +/// of avro types carry different attributes, such as names, or field names for +/// record members. Using the concept class of NoAttribute vs Attribute, the +/// NodeImpl object can enable/disable the attribute, but the code is the same +/// in either case. +/// +/// Furthermore, attributes may have different types, for example, most +/// attributes are strings, but fixed types have a size attribute, which is +/// integer. +/// +/// Since compound types are composed of other types, the leaf attribute +/// concepts extend a NodeImpl to include leaf nodes, and attributes for leaf +/// nodes, which are used to build parse trees. +/// +/// + +namespace concepts { + +template <typename Attribute> +struct NoAttribute +{ + static const bool hasAttribute = false; + + size_t size() const { + return 0; + } + + void add( const Attribute &) { + // There must be an add function for the generic NodeImpl, but the + // Node APIs ensure that it is never called, the throw here is + // just in case + throw Exception("This type does not have attribute"); + } + + const Attribute &get(size_t = 0) const { + // There must be an get function for the generic NodeImpl, but the + // Node APIs ensure that it is never called, the throw here is + // just in case + throw Exception("This type does not have attribute"); + // even though this code is unreachable the compiler requires it + static const Attribute empty = Attribute(); + return empty; + } + + Attribute &get(size_t = 0) { + // There must be an get function for the generic NodeImpl, but the + // Node APIs ensure that it is never called, the throw here is + // just in case + throw Exception("This type does not have attribute"); + } + +}; + +template<typename Attribute> +struct SingleAttribute +{ + static const bool hasAttribute = true; + + SingleAttribute() : attr_() + { } + + SingleAttribute(const Attribute& a) : attr_(a) { } + // copy constructing from another single attribute is allowed + SingleAttribute(const SingleAttribute<Attribute> &rhs) : + attr_(rhs.attr_) + { } + + // copy constructing from a no attribute is allowed + SingleAttribute(const NoAttribute<Attribute> &) : + attr_() + { } + + size_t size() const { + return 1; + } + + void add(const Attribute &attr) { + attr_ = attr; + } + + const Attribute &get(size_t index = 0) const { + if (index != 0) { + throw Exception("SingleAttribute has only 1 value"); + } + return attr_; + } + + Attribute &get(size_t index = 0) { + if (index != 0) { + throw Exception("SingleAttribute has only 1 value"); + } + return attr_; + } + +private: + template<typename T> friend struct MultiAttribute; + Attribute attr_; +}; + +template<typename Attribute> +struct MultiAttribute +{ + static const bool hasAttribute = true; + + MultiAttribute() + { } + + // copy constructing from another single attribute is allowed, it + // pushes the attribute + MultiAttribute(const SingleAttribute<Attribute> &rhs) + { + // since map is the only type that does this we know it's + // final size will be two, so reserve + attrs_.reserve(2); + attrs_.push_back(rhs.attr_); + } + + MultiAttribute(const MultiAttribute<Attribute> &rhs) : + attrs_(rhs.attrs_) + { } + + MultiAttribute(const NoAttribute<Attribute> &) + {} + + size_t size() const { + return attrs_.size(); + } + + void add(const Attribute &attr) { + attrs_.push_back(attr); + } + + const Attribute &get(size_t index = 0) const { + return attrs_.at(index); + } + + Attribute &get(size_t index) { + return attrs_.at(index); + } + + private: + + std::vector<Attribute> attrs_; +}; + + +template<typename T> +struct NameIndexConcept { + + bool lookup(const std::string &, size_t &) const { + throw Exception("Name index does not exist"); + return 0; + } + + bool add(const::std::string &, size_t) { + throw Exception("Name index does not exist"); + return false; + } +}; + +template<> +struct NameIndexConcept < MultiAttribute<std::string> > +{ + typedef std::map<std::string, size_t> IndexMap; + + bool lookup(const std::string &name, size_t &index) const { + IndexMap::const_iterator iter = map_.find(name); + if(iter == map_.end()) { + return false; + } + index = iter->second; + return true; + } + + bool add(const::std::string &name, size_t index) { + bool added = false; + IndexMap::iterator lb = map_.lower_bound(name); + if(lb == map_.end() || map_.key_comp()(name, lb->first)) { + map_.insert(lb, IndexMap::value_type(name, index)); + added = true; + } + return added; + } + + private: + + IndexMap map_; +}; + +} // namespace concepts +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/NodeImpl.hh b/contrib/libs/apache/avro/api/NodeImpl.hh new file mode 100644 index 0000000000..debce720a6 --- /dev/null +++ b/contrib/libs/apache/avro/api/NodeImpl.hh @@ -0,0 +1,619 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_NodeImpl_hh__ +#define avro_NodeImpl_hh__ + +#include "Config.hh" +#include "GenericDatum.hh" + +#include <limits> +#include <set> +#include <iomanip> +#include <iostream> +#include <sstream> +#include <memory> + +#include "Node.hh" +#include "NodeConcepts.hh" + +namespace avro { + +/// Implementation details for Node. NodeImpl represents all the avro types, +/// whose properties are enabled and disabled by selecting concept classes. + +template +< + class NameConcept, + class LeavesConcept, + class LeafNamesConcept, + class SizeConcept +> +class NodeImpl : public Node +{ + + protected: + + NodeImpl(Type type) : + Node(type), + nameAttribute_(), + docAttribute_(), + leafAttributes_(), + leafNameAttributes_(), + sizeAttribute_() + { } + + NodeImpl(Type type, + const NameConcept &name, + const LeavesConcept &leaves, + const LeafNamesConcept &leafNames, + const SizeConcept &size) : + Node(type), + nameAttribute_(name), + docAttribute_(), + leafAttributes_(leaves), + leafNameAttributes_(leafNames), + sizeAttribute_(size) + { } + + // Ctor with "doc" + NodeImpl(Type type, + const NameConcept &name, + const concepts::SingleAttribute<std::string> &doc, + const LeavesConcept &leaves, + const LeafNamesConcept &leafNames, + const SizeConcept &size) : + Node(type), + nameAttribute_(name), + docAttribute_(doc), + leafAttributes_(leaves), + leafNameAttributes_(leafNames), + sizeAttribute_(size) + {} + + void swap(NodeImpl& impl) { + std::swap(nameAttribute_, impl.nameAttribute_); + std::swap(docAttribute_, impl.docAttribute_); + std::swap(leafAttributes_, impl.leafAttributes_); + std::swap(leafNameAttributes_, impl.leafNameAttributes_); + std::swap(sizeAttribute_, impl.sizeAttribute_); + std::swap(nameIndex_, impl.nameIndex_); + } + + bool hasName() const { + // e.g.: true for single and multiattributes, false for noattributes. + return NameConcept::hasAttribute; + } + + void doSetName(const Name &name) { + nameAttribute_.add(name); + } + + const Name &name() const { + return nameAttribute_.get(); + } + + void doSetDoc(const std::string &doc) { + docAttribute_.add(doc); + } + + const std::string &getDoc() const { + return docAttribute_.get(); + } + + void doAddLeaf(const NodePtr &newLeaf) { + leafAttributes_.add(newLeaf); + } + + size_t leaves() const { + return leafAttributes_.size(); + } + + const NodePtr &leafAt(int index) const { + return leafAttributes_.get(index); + } + + void doAddName(const std::string &name) { + if (! nameIndex_.add(name, leafNameAttributes_.size())) { + throw Exception(boost::format("Cannot add duplicate name: %1%") % name); + } + leafNameAttributes_.add(name); + } + + size_t names() const { + return leafNameAttributes_.size(); + } + + const std::string &nameAt(int index) const { + return leafNameAttributes_.get(index); + } + + bool nameIndex(const std::string &name, size_t &index) const { + return nameIndex_.lookup(name, index); + } + + void doSetFixedSize(int size) { + sizeAttribute_.add(size); + } + + int fixedSize() const { + return sizeAttribute_.get(); + } + + virtual bool isValid() const = 0; + + void printBasicInfo(std::ostream &os) const; + + void setLeafToSymbolic(int index, const NodePtr &node); + + SchemaResolution furtherResolution(const Node &reader) const { + SchemaResolution match = RESOLVE_NO_MATCH; + + if (reader.type() == AVRO_SYMBOLIC) { + + // resolve the symbolic type, and check again + const NodePtr &node = reader.leafAt(0); + match = resolve(*node); + } + else if(reader.type() == AVRO_UNION) { + + // in this case, need to see if there is an exact match for the + // writer's type, or if not, the first one that can be promoted to a + // match + + for(size_t i= 0; i < reader.leaves(); ++i) { + + const NodePtr &node = reader.leafAt(i); + SchemaResolution thisMatch = resolve(*node); + + // if matched then the search is done + if(thisMatch == RESOLVE_MATCH) { + match = thisMatch; + break; + } + + // thisMatch is either no match, or promotable, this will set match to + // promotable if it hasn't been set already + if (match == RESOLVE_NO_MATCH) { + match = thisMatch; + } + } + } + + return match; + } + + NameConcept nameAttribute_; + + // Rem: NameConcept type is HasName (= SingleAttribute<Name>), we use std::string instead + concepts::SingleAttribute<std::string> docAttribute_; /** Doc used to compare schemas */ + + LeavesConcept leafAttributes_; + LeafNamesConcept leafNameAttributes_; + SizeConcept sizeAttribute_; + concepts::NameIndexConcept<LeafNamesConcept> nameIndex_; +}; + +typedef concepts::NoAttribute<Name> NoName; +typedef concepts::SingleAttribute<Name> HasName; + +typedef concepts::SingleAttribute<std::string> HasDoc; + +typedef concepts::NoAttribute<NodePtr> NoLeaves; +typedef concepts::SingleAttribute<NodePtr> SingleLeaf; +typedef concepts::MultiAttribute<NodePtr> MultiLeaves; + +typedef concepts::NoAttribute<std::string> NoLeafNames; +typedef concepts::MultiAttribute<std::string> LeafNames; + +typedef concepts::NoAttribute<int> NoSize; +typedef concepts::SingleAttribute<int> HasSize; + +typedef NodeImpl< NoName, NoLeaves, NoLeafNames, NoSize > NodeImplPrimitive; +typedef NodeImpl< HasName, NoLeaves, NoLeafNames, NoSize > NodeImplSymbolic; + +typedef NodeImpl< HasName, MultiLeaves, LeafNames, NoSize > NodeImplRecord; +typedef NodeImpl< HasName, NoLeaves, LeafNames, NoSize > NodeImplEnum; +typedef NodeImpl< NoName, SingleLeaf, NoLeafNames, NoSize > NodeImplArray; +typedef NodeImpl< NoName, MultiLeaves, NoLeafNames, NoSize > NodeImplMap; +typedef NodeImpl< NoName, MultiLeaves, NoLeafNames, NoSize > NodeImplUnion; +typedef NodeImpl< HasName, NoLeaves, NoLeafNames, HasSize > NodeImplFixed; + +class AVRO_DECL NodePrimitive : public NodeImplPrimitive +{ + public: + + explicit NodePrimitive(Type type) : + NodeImplPrimitive(type) + { } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return true; + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeSymbolic : public NodeImplSymbolic +{ + typedef std::weak_ptr<Node> NodeWeakPtr; + + public: + + NodeSymbolic() : + NodeImplSymbolic(AVRO_SYMBOLIC) + { } + + explicit NodeSymbolic(const HasName &name) : + NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()) + { } + + NodeSymbolic(const HasName &name, const NodePtr n) : + NodeImplSymbolic(AVRO_SYMBOLIC, name, NoLeaves(), NoLeafNames(), NoSize()), actualNode_(n) + { } + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return (nameAttribute_.size() == 1); + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; + + bool isSet() const { + return (actualNode_.lock() != 0); + } + + NodePtr getNode() const { + NodePtr node = actualNode_.lock(); + if(!node) { + throw Exception(boost::format("Could not follow symbol %1%") % name()); + } + return node; + } + + void setNode(const NodePtr &node) { + actualNode_ = node; + } + + protected: + + NodeWeakPtr actualNode_; + +}; + +class AVRO_DECL NodeRecord : public NodeImplRecord { + std::vector<GenericDatum> defaultValues; +public: + NodeRecord() : NodeImplRecord(AVRO_RECORD) { } + NodeRecord(const HasName &name, const MultiLeaves &fields, + const LeafNames &fieldsNames, + const std::vector<GenericDatum>& dv) : + NodeImplRecord(AVRO_RECORD, name, fields, fieldsNames, NoSize()), + defaultValues(dv) { + for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { + if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { + throw Exception(boost::format( + "Cannot add duplicate field: %1%") % + leafNameAttributes_.get(i)); + } + } + } + + NodeRecord(const HasName &name, const HasDoc &doc, const MultiLeaves &fields, + const LeafNames &fieldsNames, + const std::vector<GenericDatum> &dv) : + NodeImplRecord(AVRO_RECORD, name, doc, fields, fieldsNames, NoSize()), + defaultValues(dv) { + for (size_t i = 0; i < leafNameAttributes_.size(); ++i) { + if (!nameIndex_.add(leafNameAttributes_.get(i), i)) { + throw Exception(boost::format( + "Cannot add duplicate field: %1%") % + leafNameAttributes_.get(i)); + } + } + } + + void swap(NodeRecord& r) { + NodeImplRecord::swap(r); + defaultValues.swap(r.defaultValues); + } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return ((nameAttribute_.size() == 1) && + (leafAttributes_.size() == leafNameAttributes_.size())); + } + + const GenericDatum& defaultValueAt(int index) { + return defaultValues[index]; + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeEnum : public NodeImplEnum +{ + public: + + NodeEnum() : + NodeImplEnum(AVRO_ENUM) + { } + + NodeEnum(const HasName &name, const LeafNames &symbols) : + NodeImplEnum(AVRO_ENUM, name, NoLeaves(), symbols, NoSize()) + { + for(size_t i=0; i < leafNameAttributes_.size(); ++i) { + if(!nameIndex_.add(leafNameAttributes_.get(i), i)) { + throw Exception(boost::format("Cannot add duplicate enum: %1%") % leafNameAttributes_.get(i)); + } + } + } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return ( + (nameAttribute_.size() == 1) && + (leafNameAttributes_.size() > 0) + ); + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeArray : public NodeImplArray +{ + public: + + NodeArray() : + NodeImplArray(AVRO_ARRAY) + { } + + explicit NodeArray(const SingleLeaf &items) : + NodeImplArray(AVRO_ARRAY, NoName(), items, NoLeafNames(), NoSize()) + { } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return (leafAttributes_.size() == 1); + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeMap : public NodeImplMap +{ + public: + + NodeMap() : + NodeImplMap(AVRO_MAP) + { + NodePtr key(new NodePrimitive(AVRO_STRING)); + doAddLeaf(key); + } + + explicit NodeMap(const SingleLeaf &values) : + NodeImplMap(AVRO_MAP, NoName(), values, NoLeafNames(), NoSize()) + { + // need to add the key for the map too + NodePtr key(new NodePrimitive(AVRO_STRING)); + doAddLeaf(key); + + // key goes before value + std::swap(leafAttributes_.get(0), leafAttributes_.get(1)); + } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return (leafAttributes_.size() == 2); + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeUnion : public NodeImplUnion +{ + public: + + NodeUnion() : + NodeImplUnion(AVRO_UNION) + { } + + explicit NodeUnion(const MultiLeaves &types) : + NodeImplUnion(AVRO_UNION, NoName(), types, NoLeafNames(), NoSize()) + { } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + std::set<std::string> seen; + if (leafAttributes_.size() >= 1) { + for (size_t i = 0; i < leafAttributes_.size(); ++i) { + std::string name; + const NodePtr& n = leafAttributes_.get(i); + switch (n->type()) { + case AVRO_STRING: + name = "string"; + break; + case AVRO_BYTES: + name = "bytes"; + break; + case AVRO_INT: + name = "int"; + break; + case AVRO_LONG: + name = "long"; + break; + case AVRO_FLOAT: + name = "float"; + break; + case AVRO_DOUBLE: + name = "double"; + break; + case AVRO_BOOL: + name = "bool"; + break; + case AVRO_NULL: + name = "null"; + break; + case AVRO_ARRAY: + name = "array"; + break; + case AVRO_MAP: + name = "map"; + break; + case AVRO_RECORD: + case AVRO_ENUM: + case AVRO_UNION: + case AVRO_FIXED: + case AVRO_SYMBOLIC: + name = n->name().fullname(); + break; + default: + return false; + } + if (seen.find(name) != seen.end()) { + return false; + } + seen.insert(name); + } + return true; + } + return false; + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +class AVRO_DECL NodeFixed : public NodeImplFixed +{ + public: + + NodeFixed() : + NodeImplFixed(AVRO_FIXED) + { } + + NodeFixed(const HasName &name, const HasSize &size) : + NodeImplFixed(AVRO_FIXED, name, NoLeaves(), NoLeafNames(), size) + { } + + SchemaResolution resolve(const Node &reader) const; + + void printJson(std::ostream &os, int depth) const; + + bool isValid() const { + return ( + (nameAttribute_.size() == 1) && + (sizeAttribute_.size() == 1) + ); + } + + void printDefaultToJson(const GenericDatum& g, std::ostream &os, int depth) const; +}; + +template < class A, class B, class C, class D > +inline void +NodeImpl<A,B,C,D>::setLeafToSymbolic(int index, const NodePtr &node) +{ + if(!B::hasAttribute) { + throw Exception("Cannot change leaf node for nonexistent leaf"); + } + + NodePtr &replaceNode = const_cast<NodePtr &>(leafAttributes_.get(index)); + if(replaceNode->name() != node->name()) { + throw Exception("Symbolic name does not match the name of the schema it references"); + } + + NodePtr symbol(new NodeSymbolic); + NodeSymbolic *ptr = static_cast<NodeSymbolic *> (symbol.get()); + + ptr->setName(node->name()); + ptr->setNode(node); + replaceNode.swap(symbol); +} + +template < class A, class B, class C, class D > +inline void +NodeImpl<A,B,C,D>::printBasicInfo(std::ostream &os) const +{ + os << type(); + if(hasName()) { + os << ' ' << nameAttribute_.get(); + } + + if(D::hasAttribute) { + os << " " << sizeAttribute_.get(); + } + os << '\n'; + int count = leaves(); + count = count ? count : names(); + for(int i= 0; i < count; ++i) { + if( C::hasAttribute ) { + os << "name " << nameAt(i) << '\n'; + } + if( type() != AVRO_SYMBOLIC && leafAttributes_.hasAttribute) { + leafAt(i)->printBasicInfo(os); + } + } + if(isCompound(type())) { + os << "end " << type() << '\n'; + } +} + + +inline NodePtr resolveSymbol(const NodePtr &node) +{ + if(node->type() != AVRO_SYMBOLIC) { + throw Exception("Only symbolic nodes may be resolved"); + } + std::shared_ptr<NodeSymbolic> symNode = std::static_pointer_cast<NodeSymbolic>(node); + return symNode->getNode(); +} + +template< typename T > +inline std::string intToHex(T i) +{ + std::stringstream stream; + stream << "\\u" + << std::setfill('0') << std::setw(sizeof(T)) + << std::hex << i; + return stream.str(); +} + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Parser.hh b/contrib/libs/apache/avro/api/Parser.hh new file mode 100644 index 0000000000..fdf28fb0d5 --- /dev/null +++ b/contrib/libs/apache/avro/api/Parser.hh @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Parser_hh__ +#define avro_Parser_hh__ + +#include "Config.hh" +#include "Reader.hh" + +#include <array> + +namespace avro { + +/// +/// Class that wraps a reader or ValidatingReade with an interface that uses +/// explicit get* names instead of getValue +/// + +template<class Reader> +class Parser : private boost::noncopyable +{ + + public: + + // Constructor only works with Writer + explicit Parser(const InputBuffer &in) : + reader_(in) + {} + + /// Constructor only works with ValidatingWriter + Parser(const ValidSchema &schema, const InputBuffer &in) : + reader_(schema, in) + {} + + void readNull() { + Null null; + reader_.readValue(null); + } + + bool readBool() { + bool val; + reader_.readValue(val); + return val; + } + + int32_t readInt() { + int32_t val; + reader_.readValue(val); + return val; + } + + int64_t readLong() { + int64_t val; + reader_.readValue(val); + return val; + } + + float readFloat() { + float val; + reader_.readValue(val); + return val; + } + + double readDouble() { + double val; + reader_.readValue(val); + return val; + } + + void readString(std::string &val) { + reader_.readValue(val); + } + + void readBytes(std::vector<uint8_t> &val) { + reader_.readBytes(val); + } + + template <size_t N> + void readFixed(uint8_t (&val)[N]) { + reader_.readFixed(val); + } + + template<size_t N> + void readFixed(std::array<uint8_t, N> &val) { + reader_.readFixed(val); + } + + void readRecord() { + reader_.readRecord(); + } + + void readRecordEnd() { + reader_.readRecordEnd(); + } + + int64_t readArrayBlockSize() { + return reader_.readArrayBlockSize(); + } + + int64_t readUnion() { + return reader_.readUnion(); + } + + int64_t readEnum() { + return reader_.readEnum(); + } + + int64_t readMapBlockSize() { + return reader_.readMapBlockSize(); + } + + private: + + friend Type nextType(Parser<ValidatingReader> &p); + friend bool currentRecordName(Parser<ValidatingReader> &p, std::string &name); + friend bool nextFieldName(Parser<ValidatingReader> &p, std::string &name); + + Reader reader_; + +}; + +inline Type nextType(Parser<ValidatingReader> &p) { + return p.reader_.nextType(); +} + +inline bool currentRecordName(Parser<ValidatingReader> &p, std::string &name) { + return p.reader_.currentRecordName(name); +} + +inline bool nextFieldName(Parser<ValidatingReader> &p, std::string &name) { + return p.reader_.nextFieldName(name); +} + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Reader.hh b/contrib/libs/apache/avro/api/Reader.hh new file mode 100644 index 0000000000..4f514fbbe9 --- /dev/null +++ b/contrib/libs/apache/avro/api/Reader.hh @@ -0,0 +1,209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Reader_hh__ +#define avro_Reader_hh__ + +#include <stdint.h> +#include <vector> +#include <array> +#include <boost/noncopyable.hpp> + +#include "Config.hh" +#include "Zigzag.hh" +#include "Types.hh" +#include "Validator.hh" +#include "buffer/BufferReader.hh" + +namespace avro { + +/// +/// Parses from an avro encoding to the requested type. Assumes the next item +/// in the avro binary data is the expected type. +/// + +template<class ValidatorType> +class ReaderImpl : private boost::noncopyable +{ + + public: + + explicit ReaderImpl(const InputBuffer &buffer) : + reader_(buffer) + {} + + ReaderImpl(const ValidSchema &schema, const InputBuffer &buffer) : + validator_(schema), + reader_(buffer) + {} + + void readValue(Null &) { + validator_.checkTypeExpected(AVRO_NULL); + } + + void readValue(bool &val) { + validator_.checkTypeExpected(AVRO_BOOL); + uint8_t ival = 0; + reader_.read(ival); + val = (ival != 0); + } + + void readValue(int32_t &val) { + validator_.checkTypeExpected(AVRO_INT); + uint32_t encoded = static_cast<uint32_t>(readVarInt()); + val = decodeZigzag32(encoded); + } + + void readValue(int64_t &val) { + validator_.checkTypeExpected(AVRO_LONG); + uint64_t encoded = readVarInt(); + val = decodeZigzag64(encoded); + } + + void readValue(float &val) { + validator_.checkTypeExpected(AVRO_FLOAT); + union { + float f; + uint32_t i; + } v; + reader_.read(v.i); + val = v.f; + } + + void readValue(double &val) { + validator_.checkTypeExpected(AVRO_DOUBLE); + union { + double d; + uint64_t i; + } v; + reader_.read(v.i); + val = v.d; + } + + void readValue(std::string &val) { + validator_.checkTypeExpected(AVRO_STRING); + size_t size = static_cast<size_t>(readSize()); + reader_.read(val, size); + } + + void readBytes(std::vector<uint8_t> &val) { + validator_.checkTypeExpected(AVRO_BYTES); + size_t size = static_cast<size_t>(readSize()); + val.resize(size); + reader_.read(reinterpret_cast<char *>(val.data()), size); + } + + void readFixed(uint8_t *val, size_t size) { + validator_.checkFixedSizeExpected(size); + reader_.read(reinterpret_cast<char *>(val), size); + } + + template <size_t N> + void readFixed(uint8_t (&val)[N]) { + this->readFixed(val, N); + } + + template <size_t N> + void readFixed(std::array<uint8_t, N> &val) { + this->readFixed(val.data(), N); + } + + void readRecord() { + validator_.checkTypeExpected(AVRO_RECORD); + validator_.checkTypeExpected(AVRO_LONG); + validator_.setCount(1); + } + + void readRecordEnd() { + validator_.checkTypeExpected(AVRO_RECORD); + validator_.checkTypeExpected(AVRO_LONG); + validator_.setCount(0); + } + + int64_t readArrayBlockSize() { + validator_.checkTypeExpected(AVRO_ARRAY); + return readCount(); + } + + int64_t readUnion() { + validator_.checkTypeExpected(AVRO_UNION); + return readCount(); + } + + int64_t readEnum() { + validator_.checkTypeExpected(AVRO_ENUM); + return readCount(); + } + + int64_t readMapBlockSize() { + validator_.checkTypeExpected(AVRO_MAP); + return readCount(); + } + + Type nextType() const { + return validator_.nextTypeExpected(); + } + + bool currentRecordName(std::string &name) const { + return validator_.getCurrentRecordName(name); + } + + bool nextFieldName(std::string &name) const { + return validator_.getNextFieldName(name); + } + + private: + + uint64_t readVarInt() { + uint64_t encoded = 0; + uint8_t val = 0; + int shift = 0; + do { + reader_.read(val); + uint64_t newbits = static_cast<uint64_t>(val & 0x7f) << shift; + encoded |= newbits; + shift += 7; + } while (val & 0x80); + + return encoded; + } + + int64_t readSize() { + uint64_t encoded = readVarInt(); + int64_t size = decodeZigzag64(encoded); + return size; + } + + int64_t readCount() { + validator_.checkTypeExpected(AVRO_LONG); + int64_t count = readSize(); + validator_.setCount(count); + return count; + } + + ValidatorType validator_; + BufferReader reader_; + +}; + +typedef ReaderImpl<NullValidator> Reader; +typedef ReaderImpl<Validator> ValidatingReader; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Resolver.hh b/contrib/libs/apache/avro/api/Resolver.hh new file mode 100644 index 0000000000..a0ffcbeac7 --- /dev/null +++ b/contrib/libs/apache/avro/api/Resolver.hh @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Resolver_hh__ +#define avro_Resolver_hh__ + +#include <memory> +#include <boost/noncopyable.hpp> +#include <stdint.h> + +#include "Config.hh" +#include "Reader.hh" + +/// \file Resolver.hh +/// + +namespace avro { + +class ValidSchema; +class Layout; + +class AVRO_DECL Resolver : private boost::noncopyable +{ + + public: + + virtual void parse(Reader &reader, uint8_t *address) const = 0; + virtual ~Resolver() {} + +}; + +std::unique_ptr<Resolver> constructResolver( + const ValidSchema &rwriterSchema, + const ValidSchema &readerSchema, + const Layout &readerLayout + ); + + + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/ResolverSchema.hh b/contrib/libs/apache/avro/api/ResolverSchema.hh new file mode 100644 index 0000000000..9048a22b9a --- /dev/null +++ b/contrib/libs/apache/avro/api/ResolverSchema.hh @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_ResolverSchema_hh__ +#define avro_ResolverSchema_hh__ + +#include <boost/noncopyable.hpp> +#include <stdint.h> +#include <memory> + +#include "Config.hh" +#include "Reader.hh" + +/// \file ResolverSchema.hh +/// + +namespace avro { + +class ValidSchema; +class Layout; +class Resolver; + +class AVRO_DECL ResolverSchema { + + public: + + ResolverSchema(const ValidSchema &writer, const ValidSchema &reader, const Layout &readerLayout); + + private: + + friend class ResolvingReader; + + void parse(Reader &reader, uint8_t *address); + + std::shared_ptr<Resolver> resolver_; + +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/ResolvingReader.hh b/contrib/libs/apache/avro/api/ResolvingReader.hh new file mode 100644 index 0000000000..806e64da56 --- /dev/null +++ b/contrib/libs/apache/avro/api/ResolvingReader.hh @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_ResolvingReader_hh__ +#define avro_ResolvingReader_hh__ + +#include <stdint.h> +#include <boost/noncopyable.hpp> + +#include "Config.hh" +#include "ResolverSchema.hh" +#include "Reader.hh" + +namespace avro { + +class AVRO_DECL ResolvingReader : private boost::noncopyable +{ + + public: + + ResolvingReader(const ResolverSchema &schema, const InputBuffer &in) : + reader_(in), + schema_(schema) + {} + + template<typename T> + void parse(T &object) { + schema_.parse(reader_, reinterpret_cast<uint8_t *>(&object)); + } + + private: + + Reader reader_; + ResolverSchema schema_; +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Schema.hh b/contrib/libs/apache/avro/api/Schema.hh new file mode 100644 index 0000000000..b8ad92c825 --- /dev/null +++ b/contrib/libs/apache/avro/api/Schema.hh @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Schema_hh__ +#define avro_Schema_hh__ + +#include "Config.hh" +#include "NodeImpl.hh" +#include <string> + +/// \file +/// +/// Schemas for representing all the avro types. The compound schema objects +/// allow composition from other schemas. +/// + +namespace avro { + + +/// The root Schema object is a base class. Nobody constructs this class directly. + +class AVRO_DECL Schema { +public: + + virtual ~Schema(); + + Type type() const { + return node_->type(); + } + + const NodePtr &root() const { + return node_; + } + + NodePtr &root() { + return node_; + } + + protected: + Schema(); + explicit Schema(const NodePtr &node); + explicit Schema(Node *node); + + NodePtr node_; +}; + +class AVRO_DECL NullSchema : public Schema { +public: + NullSchema(): Schema(new NodePrimitive(AVRO_NULL)) {} +}; + +class AVRO_DECL BoolSchema : public Schema { +public: + BoolSchema(): Schema(new NodePrimitive(AVRO_BOOL)) {} +}; + +class AVRO_DECL IntSchema : public Schema { +public: + IntSchema(): Schema(new NodePrimitive(AVRO_INT)) {} +}; + +class AVRO_DECL LongSchema : public Schema { +public: + LongSchema(): Schema(new NodePrimitive(AVRO_LONG)) {} +}; + +class AVRO_DECL FloatSchema : public Schema { +public: + FloatSchema(): Schema(new NodePrimitive(AVRO_FLOAT)) {} +}; + +class AVRO_DECL DoubleSchema : public Schema { +public: + DoubleSchema(): Schema(new NodePrimitive(AVRO_DOUBLE)) {} +}; + +class AVRO_DECL StringSchema : public Schema { +public: + StringSchema(): Schema(new NodePrimitive(AVRO_STRING)) {} +}; + +class AVRO_DECL BytesSchema : public Schema { +public: + BytesSchema(): Schema(new NodePrimitive(AVRO_BYTES)) {} +}; + +class AVRO_DECL RecordSchema : public Schema { +public: + RecordSchema(const std::string &name); + void addField(const std::string &name, const Schema &fieldSchema); + + std::string getDoc() const; + void setDoc(const std::string &); +}; + +class AVRO_DECL EnumSchema : public Schema { +public: + EnumSchema(const std::string &name); + void addSymbol(const std::string &symbol); +}; + +class AVRO_DECL ArraySchema : public Schema { +public: + ArraySchema(const Schema &itemsSchema); + ArraySchema(const ArraySchema &itemsSchema); +}; + +class AVRO_DECL MapSchema : public Schema { +public: + MapSchema(const Schema &valuesSchema); + MapSchema(const MapSchema &itemsSchema); +}; + +class AVRO_DECL UnionSchema : public Schema { +public: + UnionSchema(); + void addType(const Schema &typeSchema); +}; + +class AVRO_DECL FixedSchema : public Schema { +public: + FixedSchema(int size, const std::string &name); +}; + +class AVRO_DECL SymbolicSchema : public Schema { +public: + SymbolicSchema(const Name& name, const NodePtr& link); +}; +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/SchemaResolution.hh b/contrib/libs/apache/avro/api/SchemaResolution.hh new file mode 100644 index 0000000000..765347d9de --- /dev/null +++ b/contrib/libs/apache/avro/api/SchemaResolution.hh @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_SchemaResolution_hh__ +#define avro_SchemaResolution_hh__ + +#include "Config.hh" + +namespace avro { + + +enum SchemaResolution { + + /// The schemas definitely do not match + + RESOLVE_NO_MATCH, + + /// The schemas match at a cursory level + /// + /// For records and enums, this means the name is the same, but it does not + /// necessarily mean that every symbol or field is an exact match. + + RESOLVE_MATCH, + + /// For primitives, the matching may occur if the type is promotable. This means that the + /// writer matches reader if the writer's type is promoted the specified type. + + //@{ + + RESOLVE_PROMOTABLE_TO_LONG, + RESOLVE_PROMOTABLE_TO_FLOAT, + RESOLVE_PROMOTABLE_TO_DOUBLE, + + //@} + +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Serializer.hh b/contrib/libs/apache/avro/api/Serializer.hh new file mode 100644 index 0000000000..15a8878586 --- /dev/null +++ b/contrib/libs/apache/avro/api/Serializer.hh @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Serializer_hh__ +#define avro_Serializer_hh__ + +#include <array> +#include <boost/noncopyable.hpp> + +#include "Config.hh" +#include "Writer.hh" + +namespace avro { + +/// Class that wraps a Writer or ValidatingWriter with an interface that uses +/// explicit write* names instead of writeValue + +template<class Writer> +class Serializer : private boost::noncopyable +{ + + public: + + /// Constructor only works with Writer + explicit Serializer() : + writer_() + {} + + /// Constructor only works with ValidatingWriter + Serializer(const ValidSchema &schema) : + writer_(schema) + {} + + void writeNull() { + writer_.writeValue(Null()); + } + + void writeBool(bool val) { + writer_.writeValue(val); + } + + void writeInt(int32_t val) { + writer_.writeValue(val); + } + + void writeLong(int64_t val) { + writer_.writeValue(val); + } + + void writeFloat(float val) { + writer_.writeValue(val); + } + + void writeDouble(double val) { + writer_.writeValue(val); + } + + void writeBytes(const void *val, size_t size) { + writer_.writeBytes(val); + } + + template <size_t N> + void writeFixed(const uint8_t (&val)[N]) { + writer_.writeFixed(val); + } + + template <size_t N> + void writeFixed(const std::array<uint8_t, N> &val) { + writer_.writeFixed(val); + } + + void writeString(const std::string &val) { + writer_.writeValue(val); + } + + void writeRecord() { + writer_.writeRecord(); + } + + void writeRecordEnd() { + writer_.writeRecordEnd(); + } + + void writeArrayBlock(int64_t size) { + writer_.writeArrayBlock(size); + } + + void writeArrayEnd() { + writer_.writeArrayEnd(); + } + + void writeMapBlock(int64_t size) { + writer_.writeMapBlock(size); + } + + void writeMapEnd() { + writer_.writeMapEnd(); + } + + void writeUnion(int64_t choice) { + writer_.writeUnion(choice); + } + + void writeEnum(int64_t choice) { + writer_.writeEnum(choice); + } + + InputBuffer buffer() const { + return writer_.buffer(); + } + + private: + + Writer writer_; + +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Specific.hh b/contrib/libs/apache/avro/api/Specific.hh new file mode 100644 index 0000000000..53741be480 --- /dev/null +++ b/contrib/libs/apache/avro/api/Specific.hh @@ -0,0 +1,348 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Codec_hh__ +#define avro_Codec_hh__ + +#include <string> +#include <vector> +#include <map> +#include <algorithm> +#include "array" + +#include "boost/blank.hpp" + +#include "AvroTraits.hh" +#include "Config.hh" +#include "Encoder.hh" +#include "Decoder.hh" + +/** + * A bunch of templates and specializations for encoding and decoding + * specific types. + * + * Primitive AVRO types BOOLEAN, INT, LONG, FLOAT, DOUBLE, STRING and BYTES + * get decoded to and encoded from C++ types bool, int32_t, int64_t, float, + * double, std::string and std::vector<uint8_t> respectively. In addition, + * std::vector<T> for aribtrary type T gets encoded as an Avro array of T. + * Similarly, std::map<std::string, T> for arbitrary type T gets encoded + * as an Avro map with value type T. + * + * Users can have their custom types encoded/decoded by specializing + * avro::codec_traits class for their types. + */ +namespace avro { + +typedef boost::blank null; + +template <typename T> void encode(Encoder& e, const T& t); +template <typename T> void decode(Decoder& d, T& t); + +/** + * Codec_traits tells avro how to encode and decode an object of given type. + * + * The class is expected to have two static methods: + * \li static void encode(Encoder& e, const T& value); + * \li static void decode(Decoder& e, T& value); + * The default is empty. + */ +template <typename T> +struct codec_traits; + +/** + * codec_traits for Avro boolean. + */ +template <> struct codec_traits<bool> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, bool b) { + e.encodeBool(b); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, bool& b) { + b = d.decodeBool(); + } +}; + +/** + * codec_traits for Avro int. + */ +template <> struct codec_traits<int32_t> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, int32_t i) { + e.encodeInt(i); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, int32_t& i) { + i = d.decodeInt(); + } +}; + +/** + * codec_traits for Avro long. + */ +template <> struct codec_traits<int64_t> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, int64_t l) { + e.encodeLong(l); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, int64_t& l) { + l = d.decodeLong(); + } +}; + +/** + * codec_traits for Avro float. + */ +template <> struct codec_traits<float> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, float f) { + e.encodeFloat(f); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, float& f) { + f = d.decodeFloat(); + } +}; + +/** + * codec_traits for Avro double. + */ +template <> struct codec_traits<double> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, double d) { + e.encodeDouble(d); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, double& dbl) { + dbl = d.decodeDouble(); + } +}; + +/** + * codec_traits for Avro string. + */ +template <> struct codec_traits<std::string> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const std::string& s) { + e.encodeString(s); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, std::string& s) { + s = d.decodeString(); + } +}; + +/** + * codec_traits for Avro bytes. + */ +template <> struct codec_traits<std::vector<uint8_t> > { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const std::vector<uint8_t>& b) { + e.encodeBytes(b); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, std::vector<uint8_t>& s) { + d.decodeBytes(s); + } +}; + +/** + * codec_traits for Avro fixed. + */ +template <size_t N> struct codec_traits<std::array<uint8_t, N> > { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const std::array<uint8_t, N>& b) { + e.encodeFixed(b.data(), N); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, std::array<uint8_t, N>& s) { + std::vector<uint8_t> v(N); + d.decodeFixed(N, v); + std::copy(v.data(), v.data() + N, s.data()); + } +}; + +/** + * codec_traits for Avro arrays. + */ +template <typename T> struct codec_traits<std::vector<T> > { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const std::vector<T>& b) { + e.arrayStart(); + if (! b.empty()) { + e.setItemCount(b.size()); + for (typename std::vector<T>::const_iterator it = b.begin(); + it != b.end(); ++it) { + e.startItem(); + avro::encode(e, *it); + } + } + e.arrayEnd(); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, std::vector<T>& s) { + s.clear(); + for (size_t n = d.arrayStart(); n != 0; n = d.arrayNext()) { + for (size_t i = 0; i < n; ++i) { + T t; + avro::decode(d, t); + s.push_back(std::move(t)); + } + } + } +}; + +typedef codec_traits<std::vector<bool>::const_reference> bool_codec_traits; + +template <> struct codec_traits<std::conditional<avro::is_not_defined<bool_codec_traits>::value, + std::vector<bool>::const_reference, void>::type> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, std::vector<bool>::const_reference b) { + e.encodeBool(b); + } +}; + +/** + * codec_traits for Avro maps. + */ +template <typename T> struct codec_traits<std::map<std::string, T> > { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const std::map<std::string, T>& b) { + e.mapStart(); + if (! b.empty()) { + e.setItemCount(b.size()); + for (typename std::map<std::string, T>::const_iterator + it = b.begin(); + it != b.end(); ++it) { + e.startItem(); + avro::encode(e, it->first); + avro::encode(e, it->second); + } + } + e.mapEnd(); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, std::map<std::string, T>& s) { + s.clear(); + for (size_t n = d.mapStart(); n != 0; n = d.mapNext()) { + for (size_t i = 0; i < n; ++i) { + std::string k; + avro::decode(d, k); + T& t = s[std::move(k)]; + avro::decode(d, t); + } + } + } +}; + +/** +* codec_traits for Avro null. +*/ +template <> struct codec_traits<avro::null> { + /** + * Encodes a given value. + */ + static void encode(Encoder& e, const avro::null&) { + e.encodeNull(); + } + + /** + * Decodes into a given value. + */ + static void decode(Decoder& d, avro::null&) { + d.decodeNull(); + } +}; + + + +/** + * Generic encoder function that makes use of the codec_traits. + */ +template <typename T> +void encode(Encoder& e, const T& t) { + codec_traits<T>::encode(e, t); +} + +/** + * Generic decoder function that makes use of the codec_traits. + */ +template <typename T> +void decode(Decoder& d, T& t) { + codec_traits<T>::decode(d, t); +} + +} // namespace avro + +#endif // avro_Codec_hh__ + + + diff --git a/contrib/libs/apache/avro/api/Stream.hh b/contrib/libs/apache/avro/api/Stream.hh new file mode 100644 index 0000000000..508cb03980 --- /dev/null +++ b/contrib/libs/apache/avro/api/Stream.hh @@ -0,0 +1,483 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Stream_hh__ +#define avro_Stream_hh__ + +#include <memory> +#include <string.h> +#include <stdint.h> + +#include "boost/utility.hpp" + +#include "Config.hh" +#include "Exception.hh" + +namespace avro { + +/** + * A no-copy input stream. + */ +class AVRO_DECL InputStream : boost::noncopyable { +protected: + + /** + * An empty constuctor. + */ + InputStream() { } + +public: + /** + * Destructor. + */ + virtual ~InputStream() { } + + /** + * Returns some of available data. + * + * Returns true if some data is available, false if no more data is + * available or an error has occurred. + */ + virtual bool next(const uint8_t** data, size_t* len) = 0; + + /** + * "Returns" back some of the data to the stream. The returned + * data must be less than what was obtained in the last call to + * next(). + */ + virtual void backup(size_t len) = 0; + + /** + * Skips number of bytes specified by len. + */ + virtual void skip(size_t len) = 0; + + /** + * Returns the number of bytes read from this stream so far. + * All the bytes made available through next are considered + * to be used unless, retutned back using backup. + */ + virtual size_t byteCount() const = 0; +}; + +typedef std::unique_ptr<InputStream> InputStreamPtr; + +/** + * An InputStream which also supports seeking to a specific offset. + */ +class AVRO_DECL SeekableInputStream : public InputStream { +protected: + + /** + * An empty constuctor. + */ + SeekableInputStream() { } + +public: + /** + * Destructor. + */ + virtual ~SeekableInputStream() { } + + /** + * Seek to a specific position in the stream. This may invalidate pointers + * returned from next(). This will also reset byteCount() to the given + * position. + */ + virtual void seek(int64_t position) = 0; +}; + +typedef std::unique_ptr<SeekableInputStream> SeekableInputStreamPtr; + +/** + * A no-copy output stream. + */ +class AVRO_DECL OutputStream : boost::noncopyable { +protected: + + /** + * An empty constuctor. + */ + OutputStream() { } +public: + + /** + * Destructor. + */ + virtual ~OutputStream() { } + + /** + * Returns a buffer that can be written into. + * On successful return, data has the pointer to the buffer + * and len has the number of bytes available at data. + */ + virtual bool next(uint8_t** data, size_t* len) = 0; + + /** + * "Returns" back to the stream some of the buffer obtained + * from in the last call to next(). + */ + virtual void backup(size_t len) = 0; + + /** + * Number of bytes written so far into this stream. The whole buffer + * returned by next() is assumed to be written unless some of + * it was retutned using backup(). + */ + virtual uint64_t byteCount() const = 0; + + /** + * Flushes any data remaining in the buffer to the stream's underlying + * store, if any. + */ + virtual void flush() = 0; +}; + +typedef std::unique_ptr<OutputStream> OutputStreamPtr; + +/** + * Returns a new OutputStream, which grows in memory chunks of specified size. + */ +AVRO_DECL OutputStreamPtr memoryOutputStream(size_t chunkSize = 4 * 1024); + +/** + * Returns a new InputStream, with the data from the given byte array. + * It does not copy the data, the byte array should remain valid + * until the InputStream is used. + */ +AVRO_DECL InputStreamPtr memoryInputStream(const uint8_t* data, size_t len); + +/** + * Returns a new InputStream with the contents written into an + * outputstream. The output stream must have been returned by + * an earlier call to memoryOutputStream(). The contents for the new + * input stream are the snapshot of the outputstream. One can construct + * any number of memory input stream from a single memory output stream. + */ +AVRO_DECL InputStreamPtr memoryInputStream(const OutputStream& source); + +/** + * Returns the contents written so far into the output stream, which should + * be a memory output stream. That is it must have been returned by a pervious + * call to memoryOutputStream(). + */ +AVRO_DECL std::shared_ptr<std::vector<uint8_t> > snapshot(const OutputStream& source); + +/** + * Returns a new OutputStream whose contents would be stored in a file. + * Data is written in chunks of given buffer size. + * + * If there is a file with the given name, it is truncated and overwritten. + * If there is no file with the given name, it is created. + */ +AVRO_DECL OutputStreamPtr fileOutputStream(const char* filename, + size_t bufferSize = 8 * 1024); + +/** + * Returns a new InputStream whose contents come from the given file. + * Data is read in chunks of given buffer size. + */ +AVRO_DECL InputStreamPtr fileInputStream( + const char *filename, size_t bufferSize = 8 * 1024); +AVRO_DECL SeekableInputStreamPtr fileSeekableInputStream( + const char *filename, size_t bufferSize = 8 * 1024); + +/** + * Returns a new OutputStream whose contents will be sent to the given + * std::ostream. The std::ostream object should outlive the returned + * OutputStream. + */ +AVRO_DECL OutputStreamPtr ostreamOutputStream(std::ostream& os, + size_t bufferSize = 8 * 1024); + +/** + * Returns a new InputStream whose contents come from the given + * std::istream. The std::istream object should outlive the returned + * InputStream. + */ +AVRO_DECL InputStreamPtr istreamInputStream( + std::istream &in, size_t bufferSize = 8 * 1024); + +/** + * Returns a new InputStream whose contents come from the given + * std::istream. Use this instead of istreamInputStream if + * the istream does not support seekg (e.g. compressed streams). + * The returned InputStream would read off bytes instead of seeking. + * Of, course it has a performance penalty when reading instead of seeking; + * So, use this only when seekg does not work. + * The std::istream object should outlive the returned + * InputStream. + */ +AVRO_DECL InputStreamPtr nonSeekableIstreamInputStream( + std::istream& is, size_t bufferSize = 8 * 1024); + + +/** A convenience class for reading from an InputStream */ +struct StreamReader { + /** + * The underlying input stream. + */ + InputStream* in_; + + /** + * The next location to read from. + */ + const uint8_t* next_; + + /** + * One past the last valid location. + */ + const uint8_t* end_; + + /** + * Constructs an empty reader. + */ + StreamReader() : in_(0), next_(0), end_(0) { } + + /** + * Constructs a reader with the given underlying stream. + */ + StreamReader(InputStream& in) : in_(0), next_(0), end_(0) { reset(in); } + + /** + * Replaces the current input stream with the given one after backing up + * the original one if required. + */ + void reset(InputStream& is) { + if (in_ != 0 && end_ != next_) { + in_->backup(end_ - next_); + } + in_ = &is; + next_ = end_ = 0; + } + + /** + * Read just one byte from the underlying stream. If there are no + * more data, throws an exception. + */ + uint8_t read() { + if (next_ == end_) { + more(); + } + return *next_++; + } + + /** + * Reads the given number of bytes from the underlying stream. + * If there are not that many bytes, throws an exception. + */ + void readBytes(uint8_t* b, size_t n) { + while (n > 0) { + if (next_ == end_) { + more(); + } + size_t q = end_ - next_; + if (q > n) { + q = n; + } + ::memcpy(b, next_, q); + next_ += q; + b += q; + n -= q; + } + } + + /** + * Skips the given number of bytes. Of there are not so that many + * bytes, throws an exception. + */ + void skipBytes(size_t n) { + if (n > static_cast<size_t>(end_ - next_)) { + n -= end_ - next_; + next_ = end_; + in_->skip(n); + } else { + next_ += n; + } + } + + /** + * Get as many byes from the underlying stream as possible in a single + * chunk. + * \return true if some data could be obtained. False is no more + * data is available on the stream. + */ + bool fill() { + size_t n = 0; + while (in_->next(&next_, &n)) { + if (n != 0) { + end_ = next_ + n; + return true; + } + } + return false; + } + + /** + * Tries to get more data and if it cannot, throws an exception. + */ + void more() { + if (! fill()) { + throw Exception("EOF reached"); + } + } + + /** + * Returns true if and only if the end of stream is not reached. + */ + bool hasMore() { + return (next_ == end_) ? fill() : true; + } + + /** + * Returns unused bytes back to the underlying stream. + * If unRead is true the last byte read is also pushed back. + */ + void drain(bool unRead) { + if (unRead) { + --next_; + } + in_->backup(end_ - next_); + end_ = next_; + } +}; + +/** + * A convinience class to write data into an OutputStream. + */ +struct StreamWriter { + /** + * The underlying output stream for this writer. + */ + OutputStream* out_; + + /** + * The next location to write to. + */ + uint8_t* next_; + + /** + * One past the last location one can write to. + */ + uint8_t* end_; + + /** + * Constructs a writer with no underlying stream. + */ + StreamWriter() : out_(0), next_(0), end_(0) { } + + /** + * Constructs a new writer with the given underlying stream. + */ + StreamWriter(OutputStream& out) : out_(0), next_(0), end_(0) { reset(out); } + + /** + * Replaces the current underlying stream with a new one. + * If required, it backs up unused bytes in the previous stream. + */ + void reset(OutputStream& os) { + if (out_ != 0 && end_ != next_) { + out_->backup(end_ - next_); + } + out_ = &os; + next_ = end_; + } + + /** + * Writes a single byte. + */ + void write(uint8_t c) { + if (next_ == end_) { + more(); + } + *next_++ = c; + } + + /** + * Writes the specified number of bytes starting at \p b. + */ + void writeBytes(const uint8_t* b, size_t n) { + while (n > 0) { + if (next_ == end_) { + more(); + } + size_t q = end_ - next_; + if (q > n) { + q = n; + } + ::memcpy(next_, b, q); + next_ += q; + b += q; + n -= q; + } + } + + /** + * backs up upto the currently written data and flushes the + * underlying stream. + */ + void flush() { + if (next_ != end_) { + out_->backup(end_ - next_); + next_ = end_; + } + out_->flush(); + } + + /** + * Return the number of bytes written so far. For a meaningful + * result, call this after a flush(). + */ + int64_t byteCount() const { + return out_->byteCount(); + } + + /** + * Gets more space to write to. Throws an exception it cannot. + */ + void more() { + size_t n = 0; + while (out_->next(&next_, &n)) { + if (n != 0) { + end_ = next_ + n; + return; + } + } + throw Exception("EOF reached"); + } +}; + +/** + * A convenience function to copy all the contents of an input stream into + * an output stream. + */ +inline void copy(InputStream& in, OutputStream& out) +{ + const uint8_t *p = 0; + size_t n = 0; + StreamWriter w(out); + while (in.next(&p, &n)) { + w.writeBytes(p, n); + } + w.flush(); +} + +} // namespace avro +#endif + + diff --git a/contrib/libs/apache/avro/api/Types.hh b/contrib/libs/apache/avro/api/Types.hh new file mode 100644 index 0000000000..f42399e96b --- /dev/null +++ b/contrib/libs/apache/avro/api/Types.hh @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Types_hh__ +#define avro_Types_hh__ + +#include <iostream> + +#include "Config.hh" + +namespace avro { + +/** + * The "type" for the schema. + */ +enum Type { + + AVRO_STRING, /*!< String */ + AVRO_BYTES, /*!< Sequence of variable length bytes data */ + AVRO_INT, /*!< 32-bit integer */ + AVRO_LONG, /*!< 64-bit integer */ + AVRO_FLOAT, /*!< Floating point number */ + AVRO_DOUBLE, /*!< Double precision floating point number */ + AVRO_BOOL, /*!< Boolean value */ + AVRO_NULL, /*!< Null */ + + AVRO_RECORD, /*!< Record, a sequence of fields */ + AVRO_ENUM, /*!< Enumeration */ + AVRO_ARRAY, /*!< Homogeneous array of some specific type */ + AVRO_MAP, /*!< Homogeneous map from string to some specific type */ + AVRO_UNION, /*!< Union of one or more types */ + AVRO_FIXED, /*!< Fixed number of bytes */ + + AVRO_NUM_TYPES, /*!< Marker */ + + // The following is a pseudo-type used in implementation + + AVRO_SYMBOLIC = AVRO_NUM_TYPES, /*!< User internally to avoid circular references. */ + AVRO_UNKNOWN = -1 /*!< Used internally. */ + +}; + +/** + * Returns true if and only if the given type is a primitive. + * Primitive types are: string, bytes, int, long, float, double, boolean + * and null + */ +inline bool isPrimitive(Type t) { + return (t >= AVRO_STRING) && (t < AVRO_RECORD); +} + +/** + * Returns true if and only if the given type is a non primitive valid type. + * Primitive types are: string, bytes, int, long, float, double, boolean + * and null + */ +inline bool isCompound(Type t) { + return (t>= AVRO_RECORD) && (t < AVRO_NUM_TYPES); +} + +/** + * Returns true if and only if the given type is a valid avro type. + */ +inline bool isAvroType(Type t) { + return (t >= AVRO_STRING) && (t < AVRO_NUM_TYPES); +} + +/** + * Returns true if and only if the given type is within the valid range + * of enumeration. + */ +inline bool isAvroTypeOrPseudoType(Type t) { + return (t >= AVRO_STRING) && (t <= AVRO_NUM_TYPES); +} + +/** + * Converts the given type into a string. Useful for generating messages. + */ +AVRO_DECL const std::string& toString(Type type); + +/** + * Writes a string form of the given type into the given ostream. + */ +AVRO_DECL std::ostream &operator<< (std::ostream &os, avro::Type type); + +/// define a type to identify Null in template functions +struct AVRO_DECL Null { }; + +/** + * Writes schema for null \p null type to \p os. + * \param os The ostream to write to. + * \param null The value to be written. + */ +std::ostream& operator<< (std::ostream &os, const Null &null); + +} // namespace avro + + +#endif diff --git a/contrib/libs/apache/avro/api/ValidSchema.hh b/contrib/libs/apache/avro/api/ValidSchema.hh new file mode 100644 index 0000000000..7b0ec28bed --- /dev/null +++ b/contrib/libs/apache/avro/api/ValidSchema.hh @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_ValidSchema_hh__ +#define avro_ValidSchema_hh__ + +#include "Config.hh" +#include "Node.hh" + +namespace avro { + +class AVRO_DECL Schema; + +/// A ValidSchema is basically a non-mutable Schema that has passed some +/// minimum of sanity checks. Once validated, any Schema that is part of +/// this ValidSchema is considered locked, and cannot be modified (an attempt +/// to modify a locked Schema will throw). Also, as it is validated, any +/// recursive duplications of schemas are replaced with symbolic links to the +/// original. +/// +/// Once a Schema is converted to a valid schema it can be used in validating +/// parsers/serializers, converted to a json schema, etc. +/// + +class AVRO_DECL ValidSchema { +public: + explicit ValidSchema(const NodePtr &root); + explicit ValidSchema(const Schema &schema); + ValidSchema(); + + void setSchema(const Schema &schema); + + const NodePtr &root() const { + return root_; + } + + void toJson(std::ostream &os) const; + std::string toJson(bool prettyPrint = true) const; + + void toFlatList(std::ostream &os) const; + + protected: + NodePtr root_; + + private: + static std::string compactSchema(const std::string &schema); +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Validator.hh b/contrib/libs/apache/avro/api/Validator.hh new file mode 100644 index 0000000000..3f542d611a --- /dev/null +++ b/contrib/libs/apache/avro/api/Validator.hh @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Validating_hh__ +#define avro_Validating_hh__ + +#include <boost/noncopyable.hpp> +#include <vector> +#include <stdint.h> + +#include "Config.hh" +#include "Types.hh" +#include "ValidSchema.hh" + +namespace avro { + +class AVRO_DECL NullValidator : private boost::noncopyable +{ + public: + + explicit NullValidator(const ValidSchema &) {} + NullValidator() {} + + void setCount(int64_t) {} + + bool typeIsExpected(Type) const { + return true; + } + + Type nextTypeExpected() const { + return AVRO_UNKNOWN; + } + + int nextSizeExpected() const { + return 0; + } + + bool getCurrentRecordName(std::string &) const { + return true; + } + + bool getNextFieldName(std::string &) const { + return true; + } + + void checkTypeExpected(Type) { } + void checkFixedSizeExpected(int) { } + + +}; + +/// This class is used by both the ValidatingSerializer and ValidationParser +/// objects. It advances the parse tree (containing logic how to advance +/// through the various compound types, for example a record must advance +/// through all leaf nodes but a union only skips to one), and reports which +/// type is next. + +class AVRO_DECL Validator : private boost::noncopyable +{ + public: + + explicit Validator(const ValidSchema &schema); + + void setCount(int64_t val); + + bool typeIsExpected(Type type) const { + return (expectedTypesFlag_ & typeToFlag(type)) != 0; + } + + Type nextTypeExpected() const { + return nextType_; + } + + int nextSizeExpected() const; + + bool getCurrentRecordName(std::string &name) const; + bool getNextFieldName(std::string &name) const; + + void checkTypeExpected(Type type) { + if(! typeIsExpected(type)) { + throw Exception( + boost::format("Type %1% does not match schema %2%") + % type % nextType_ + ); + } + advance(); + } + + void checkFixedSizeExpected(int size) { + if( nextSizeExpected() != size) { + throw Exception( + boost::format("Wrong size for fixed, got %1%, expected %2%") + % size % nextSizeExpected() + ); + } + checkTypeExpected(AVRO_FIXED); + } + + private: + + typedef uint32_t flag_t; + + flag_t typeToFlag(Type type) const { + flag_t flag = (1L << type); + return flag; + } + + void setupOperation(const NodePtr &node); + + void setWaitingForCount(); + + void advance(); + void doAdvance(); + + void enumAdvance(); + bool countingSetup(); + void countingAdvance(); + void unionAdvance(); + void fixedAdvance(); + + void setupFlag(Type type); + + const ValidSchema schema_; + + Type nextType_; + flag_t expectedTypesFlag_; + bool compoundStarted_; + bool waitingForCount_; + int64_t count_; + + struct CompoundType { + explicit CompoundType(const NodePtr &n) : + node(n), pos(0) + {} + NodePtr node; ///< save the node + size_t pos; ///< track the leaf position to visit + }; + + std::vector<CompoundType> compoundStack_; + std::vector<size_t> counters_; + +}; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Writer.hh b/contrib/libs/apache/avro/api/Writer.hh new file mode 100644 index 0000000000..74b057ce65 --- /dev/null +++ b/contrib/libs/apache/avro/api/Writer.hh @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Writer_hh__ +#define avro_Writer_hh__ + +#include <array> +#include <boost/noncopyable.hpp> + +#include "Config.hh" +#include "buffer/Buffer.hh" +#include "Zigzag.hh" +#include "Types.hh" +#include "Validator.hh" + +namespace avro { + +/// Class for writing avro data to a stream. + +template<class ValidatorType> +class WriterImpl : private boost::noncopyable +{ + + public: + + WriterImpl() {} + + explicit WriterImpl(const ValidSchema &schema) : + validator_(schema) + {} + + void writeValue(const Null &) { + validator_.checkTypeExpected(AVRO_NULL); + } + + void writeValue(bool val) { + validator_.checkTypeExpected(AVRO_BOOL); + int8_t byte = (val != 0); + buffer_.writeTo(byte); + } + + void writeValue(int32_t val) { + validator_.checkTypeExpected(AVRO_INT); + std::array<uint8_t, 5> bytes; + size_t size = encodeInt32(val, bytes); + buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size); + } + + void writeValue(int64_t val) { + validator_.checkTypeExpected(AVRO_LONG); + putLong(val); + } + + void writeValue(float val) { + validator_.checkTypeExpected(AVRO_FLOAT); + union { + float f; + int32_t i; + } v; + + v.f = val; + buffer_.writeTo(v.i); + } + + void writeValue(double val) { + validator_.checkTypeExpected(AVRO_DOUBLE); + union { + double d; + int64_t i; + } v; + + v.d = val; + buffer_.writeTo(v.i); + } + + void writeValue(const std::string &val) { + validator_.checkTypeExpected(AVRO_STRING); + putBytes(val.c_str(), val.size()); + } + + void writeBytes(const void *val, size_t size) { + validator_.checkTypeExpected(AVRO_BYTES); + putBytes(val, size); + } + + template <size_t N> + void writeFixed(const uint8_t (&val)[N]) { + validator_.checkFixedSizeExpected(N); + buffer_.writeTo(reinterpret_cast<const char *>(val), N); + } + + template <size_t N> + void writeFixed(const std::array<uint8_t, N> &val) { + validator_.checkFixedSizeExpected(val.size()); + buffer_.writeTo(reinterpret_cast<const char *>(val.data()), val.size()); + } + + void writeRecord() { + validator_.checkTypeExpected(AVRO_RECORD); + validator_.checkTypeExpected(AVRO_LONG); + validator_.setCount(1); + } + + void writeRecordEnd() { + validator_.checkTypeExpected(AVRO_RECORD); + validator_.checkTypeExpected(AVRO_LONG); + validator_.setCount(0); + } + + void writeArrayBlock(int64_t size) { + validator_.checkTypeExpected(AVRO_ARRAY); + writeCount(size); + } + + void writeArrayEnd() { + writeArrayBlock(0); + } + + void writeMapBlock(int64_t size) { + validator_.checkTypeExpected(AVRO_MAP); + writeCount(size); + } + + void writeMapEnd() { + writeMapBlock(0); + } + + void writeUnion(int64_t choice) { + validator_.checkTypeExpected(AVRO_UNION); + writeCount(choice); + } + + void writeEnum(int64_t choice) { + validator_.checkTypeExpected(AVRO_ENUM); + writeCount(choice); + } + + InputBuffer buffer() const { + return buffer_; + } + + private: + + void putLong(int64_t val) { + std::array<uint8_t, 10> bytes; + size_t size = encodeInt64(val, bytes); + buffer_.writeTo(reinterpret_cast<const char *>(bytes.data()), size); + } + + void putBytes(const void *val, size_t size) { + putLong(size); + buffer_.writeTo(reinterpret_cast<const char *>(val), size); + } + + void writeCount(int64_t count) { + validator_.checkTypeExpected(AVRO_LONG); + validator_.setCount(count); + putLong(count); + } + + ValidatorType validator_; + OutputBuffer buffer_; + +}; + +typedef WriterImpl<NullValidator> Writer; +typedef WriterImpl<Validator> ValidatingWriter; + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/Zigzag.hh b/contrib/libs/apache/avro/api/Zigzag.hh new file mode 100644 index 0000000000..d0259b8d50 --- /dev/null +++ b/contrib/libs/apache/avro/api/Zigzag.hh @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Encoding_hh__ +#define avro_Encoding_hh__ + +#include <stdint.h> +#include <array> +#include <cstddef> + +#include "Config.hh" +/// \file +/// Functions for encoding and decoding integers with zigzag compression + +namespace avro { + +AVRO_DECL uint64_t encodeZigzag64(int64_t input); +AVRO_DECL int64_t decodeZigzag64(uint64_t input); + +AVRO_DECL uint32_t encodeZigzag32(int32_t input); +AVRO_DECL int32_t decodeZigzag32(uint32_t input); + +AVRO_DECL size_t encodeInt32(int32_t input, std::array<uint8_t, 5> &output); +AVRO_DECL size_t encodeInt64(int64_t input, std::array<uint8_t, 10> &output); + +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/api/buffer/Buffer.hh b/contrib/libs/apache/avro/api/buffer/Buffer.hh new file mode 100644 index 0000000000..7d7aaf8679 --- /dev/null +++ b/contrib/libs/apache/avro/api/buffer/Buffer.hh @@ -0,0 +1,526 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_Buffer_hh__ +#define avro_Buffer_hh__ + +#ifndef _WIN32 +#include <sys/uio.h> +#endif +#include <vector> + +#include "../Config.hh" +#include "detail/BufferDetail.hh" +#include "detail/BufferDetailIterator.hh" + +/** + * \file Buffer.hh + * + * \brief Definitions for InputBuffer and OutputBuffer classes + * + **/ + +namespace avro { + +class OutputBuffer; +class InputBuffer; + + +/** + * The OutputBuffer (write-only buffer) + * + * Use cases for OutputBuffer + * + * - write message to buffer using ostream class or directly + * - append messages to headers + * - building up streams of messages via append + * - converting to read-only buffers for sending + * - extracting parts of the messages into read-only buffers + * + * -# ASIO access: + * - write to a buffer(s) by asio using iterator + * - convert to read buffer for deserializing + * + * OutputBuffer is assignable and copy-constructable. On copy or assignment, + * only a pointer is copied, so the two resulting copies are identical, so + * modifying one will modify both. + **/ + +class AVRO_DECL OutputBuffer +{ + + public: + + typedef detail::size_type size_type; + typedef detail::data_type data_type; + + /** + * The asio library expects a const_iterator (the const-ness refers to the + * fact that the underlying avro of buffers will not be modified, even + * though the data in those buffers is being modified). The iterator + * provides the list of addresses an operation can write to. + **/ + + typedef detail::OutputBufferIterator const_iterator; + + /** + * Default constructor. Will pre-allocate at least the requested size, but + * can grow larger on demand. + * + * Destructor uses the default, which resets a shared pointer, deleting the + * underlying data if no other copies of exist. + * + * Copy and assignment operators are not explicitly provided because the + * default ones work fine. The default makes only a shallow copy, so the + * copies will refer to the same memory. This is required by asio + * functions, which will implicitly make copies for asynchronous + * operations. Therefore, the user must be careful that if they create + * multiple copies of the same OutputBuffer, only one is being modified + * otherwise undefined behavior may occur. + * + **/ + + OutputBuffer(size_type reserveSize = 0) : + pimpl_(new detail::BufferImpl) + { + if(reserveSize) { + reserve(reserveSize); + } + } + + /** + * Reserve enough space for a wroteTo() operation. When using writeTo(), + * the buffer will grow dynamically as needed. But when using the iterator + * to write (followed by wroteTo()), data may only be written to the space + * available, so this ensures there is enough room in the buffer before + * the write operation. + **/ + + void reserve(size_type reserveSize) + { + pimpl_->reserveFreeSpace(reserveSize); + } + + /** + * Write a block of data to the buffer. The buffer size will automatically + * grow if the size is larger than what is currently free. + **/ + + size_type writeTo(const data_type *data, size_type size) { + return pimpl_->writeTo(data, size); + } + + /** + * Write a single value to the buffer. The buffer size will automatically + * grow if there is not room for the byte. The value must be a + * "fundamental" type, e.g. int, float, etc. (otherwise use the other + * writeTo tests). + **/ + + template<typename T> + void writeTo(T val) { + pimpl_->writeTo(val, std::is_fundamental<T>()); + } + + /** + * Update the state of the buffer after writing through the iterator + * interface. This function exists primarily for the boost:asio which + * writes directly to the buffer using its iterator. In this case, the + * internal state of the buffer does not reflect that the data was written + * This informs the buffer how much data was written. + * + * The buffer does not automatically resize in this case, the bytes written + * cannot exceed the amount of free space. Attempting to write more will + * throw a std::length_error exception. + **/ + + size_type wroteTo(size_type size) + { + int wrote = 0; + if(size) { + if(size > freeSpace()) { + throw std::length_error("Impossible to write more data than free space"); + } + wrote = pimpl_->wroteTo(size); + } + return wrote; + } + + /** + * Does the buffer have any data? + **/ + + bool empty() const { + return (pimpl_->size()==0); + } + + /** + * Returns the size of the buffer, in bytes. + */ + + size_type size() const { + return pimpl_->size(); + } + + /** + * Returns the current free space that is available to write to in the + * buffer, in bytes. This is not a strict limit in size, as writeTo() can + * automatically increase capacity if necessary. + **/ + + size_type freeSpace() const { + return pimpl_->freeSpace(); + } + + /** + * Appends the data in the argument to the end of this buffer. The + * argument can be either an InputBuffer or OutputBuffer. + * + **/ + + template <class BufferType> + void append(const BufferType &buf) { + // don't append an empty buffer + if(buf.size()) { + pimpl_->append(*(buf.pimpl_.get())); + } + } + + /** + * Return an iterator pointing to the first data chunk of this buffer + * that may be written to. + **/ + + const_iterator begin() const { + return const_iterator(pimpl_->beginWrite()); + } + + /** + * Return the end iterator for writing. + **/ + + const_iterator end() const { + return const_iterator(pimpl_->endWrite()); + } + + /** + * Discard any data in this buffer. + **/ + + void discardData() + { + pimpl_->discardData(); + } + + /** + * Discard the specified number of bytes from this data, starting at the beginning. + * Throws if the size is greater than the number of bytes. + **/ + + void discardData(size_t bytes) + { + if(bytes > 0) { + if(bytes < pimpl_->size()) { + pimpl_->discardData(bytes); + } + else if(bytes == pimpl_->size()) { + pimpl_->discardData(); + } + else { + throw std::out_of_range("trying to discard more data than exists"); + } + } + } + + /** + * Remove bytes from this buffer, starting from the beginning, and place + * them into a new buffer. Throws if the number of requested bytes exceeds + * the size of the buffer. Data and freeSpace in the buffer after bytes + * remains in this buffer. + **/ + + InputBuffer extractData(size_type bytes); + + /** + * Remove all bytes from this buffer, returning them in a new buffer. + * After removing data, some freeSpace may remain in this buffer. + **/ + + InputBuffer extractData(); + + /** + * Clone this buffer, creating a copy that contains the same data. + **/ + + OutputBuffer clone() const + { + detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl(*pimpl_)); + return OutputBuffer(newImpl); + } + + /** + * Add unmanaged data to the buffer. The buffer will not automatically + * free the data, but it will call the supplied function when the data is + * no longer referenced by the buffer (or copies of the buffer). + **/ + + void appendForeignData(const data_type *data, size_type size, const detail::free_func &func) { + pimpl_->appendForeignData(data, size, func); + } + + /** + * Returns the number of chunks that contain free space. + **/ + + int numChunks() const { + return pimpl_->numFreeChunks(); + } + + /** + * Returns the number of chunks that contain data + **/ + + int numDataChunks() const { + return pimpl_->numDataChunks(); + } + + private: + + friend class InputBuffer; + friend class BufferReader; + + explicit OutputBuffer(const detail::BufferImpl::SharedPtr &pimpl) : + pimpl_(pimpl) + { } + + detail::BufferImpl::SharedPtr pimpl_; ///< Must never be null. +}; + +/** + * The InputBuffer (read-only buffer) + * + * InputBuffer is an immutable buffer which that may be constructed from an + * OutputBuffer, or several of OutputBuffer's methods. Once the data is + * transfered to an InputBuffer it cannot be modified, only read (via + * BufferReader, istream, or its iterator). + * + * Assignments and copies are shallow copies. + * + * -# ASIO access: - iterate using const_iterator for sending messages + * + **/ + +class AVRO_DECL InputBuffer +{ + + public: + + typedef detail::size_type size_type; + typedef detail::data_type data_type; + + // needed for asio + typedef detail::InputBufferIterator const_iterator; + + /** + * Default InputBuffer creates an empty buffer. + * + * Copy/assignment functions use the default ones. They will do a shallow + * copy, and because InputBuffer is immutable, the copies will be + * identical. + * + * Destructor also uses the default, which resets a shared pointer, + * deleting the underlying data if no other copies of exist. + **/ + + InputBuffer() : + pimpl_(new detail::BufferImpl) + { } + + /** + * Construct an InputBuffer that contains the contents of an OutputBuffer. + * The two buffers will have the same contents, but this copy will be + * immutable, while the the OutputBuffer may still be written to. + * + * If you wish to move the data from the OutputBuffer to a new InputBuffer + * (leaving only free space in the OutputBuffer), + * OutputBuffer::extractData() will do this more efficiently. + * + * Implicit conversion is allowed. + **/ + + InputBuffer(const OutputBuffer &src) : + pimpl_(new detail::BufferImpl(*src.pimpl_)) + { } + + /** + * Does the buffer have any data? + **/ + + bool empty() const { + return (pimpl_->size() == 0); + } + + /** + * Returns the size of the buffer, in bytes. + **/ + + size_type size() const { + return pimpl_->size(); + } + + /** + * Return an iterator pointing to the first data chunk of this buffer + * that contains data. + **/ + + const_iterator begin() const { + return const_iterator(pimpl_->beginRead()); + } + + /** + * Return the end iterator. + **/ + + const_iterator end() const { + return const_iterator(pimpl_->endRead()); + } + + /** + * Returns the number of chunks containing data. + **/ + + int numChunks() const { + return pimpl_->numDataChunks(); + } + + + private: + + friend class OutputBuffer; // for append function + friend class istreambuf; + friend class BufferReader; + + explicit InputBuffer(const detail::BufferImpl::SharedPtr &pimpl) : + pimpl_(pimpl) + { } + + /** + * Class to indicate that a copy of a OutputBuffer to InputBuffer should be + * a shallow copy, used to enable reading of the contents of an + * OutputBuffer without need to convert it to InputBuffer using a deep + * copy. It is private and only used by BufferReader and istreambuf + * classes. + * + * Writing to an OutputBuffer while it is being read may lead to undefined + * behavior. + **/ + + class ShallowCopy {}; + + /** + * Make a shallow copy of an OutputBuffer in order to read it without + * causing conversion overhead. + **/ + InputBuffer(const OutputBuffer &src, const ShallowCopy &) : + pimpl_(src.pimpl_) + { } + + /** + * Make a shallow copy of an InputBuffer. The default copy constructor + * already provides shallow copy, this is just provided for generic + * algorithms that wish to treat InputBuffer and OutputBuffer in the same + * manner. + **/ + + InputBuffer(const InputBuffer &src, const ShallowCopy &) : + pimpl_(src.pimpl_) + { } + + + detail::BufferImpl::ConstSharedPtr pimpl_; ///< Must never be null. +}; + + +/* + * Implementations of some OutputBuffer functions are inlined here + * because InputBuffer definition was required before. + */ + +inline InputBuffer OutputBuffer::extractData() +{ + detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl); + if(pimpl_->size()) { + pimpl_->extractData(*newImpl); + } + return InputBuffer(newImpl); +} + +inline InputBuffer OutputBuffer::extractData(size_type bytes) +{ + if(bytes > pimpl_->size()) { + throw std::out_of_range("trying to extract more data than exists"); + } + + detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl); + if(bytes > 0) { + if(bytes < pimpl_->size()) { + pimpl_->extractData(*newImpl, bytes); + } + else { + pimpl_->extractData(*newImpl); + } + } + + return InputBuffer(newImpl); +} + +#ifndef _WIN32 +/** + * Create an array of iovec structures from the buffer. This utility is used + * to support writev and readv function calls. The caller should ensure the + * buffer object is not deleted while using the iovec vector. + * + * If the BufferType is an InputBuffer, the iovec will point to the data that + * already exists in the buffer, for reading. + * + * If the BufferType is an OutputBuffer, the iovec will point to the free + * space, which may be written to. Before writing, the caller should call + * OutputBuffer::reserve() to create enough room for the desired write (which + * can be verified by calling OutputBuffer::freeSpace()), and after writing, + * they MUST call OutputBuffer::wroteTo(), otherwise the buffer will not know + * the space is not free anymore. + * + **/ + +template<class BufferType> +inline void toIovec(BufferType &buf, std::vector<struct iovec> &iov) +{ + const int chunks = buf.numChunks(); + iov.resize(chunks); + typename BufferType::const_iterator iter = buf.begin(); + for (int i = 0; i < chunks; ++i) { + iov[i].iov_base = const_cast<typename BufferType::data_type *>(iter->data()); + iov[i].iov_len = iter->size(); + ++iter; + } +} +#endif + +} // namespace + +#endif diff --git a/contrib/libs/apache/avro/api/buffer/BufferReader.hh b/contrib/libs/apache/avro/api/buffer/BufferReader.hh new file mode 100644 index 0000000000..83b6b4b324 --- /dev/null +++ b/contrib/libs/apache/avro/api/buffer/BufferReader.hh @@ -0,0 +1,289 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_BufferReader_hh__ +#define avro_BufferReader_hh__ + +#include <type_traits> +#include "Buffer.hh" + +#ifdef min +#undef min +#endif +/** + * \file BufferReader.hh + * + * \brief Helper class for reading bytes from buffer in a streaming manner, + * without the overhead of istreams. + * + **/ + +namespace avro { + +/** + * Helper class for reading bytes from buffer without worrying about + * chunk boundaries. May read from an InputBuffer or OutputBuffer. + * + **/ +class AVRO_DECL BufferReader : private boost::noncopyable +{ + + public: + + typedef detail::data_type data_type; + typedef detail::size_type size_type; + + private: + + size_type chunkRemaining() const { + return iter_->dataSize() - chunkPos_; + } + + void incrementChunk(size_type howmuch) { + bytesRemaining_ -= howmuch; + chunkPos_ += howmuch; + if(chunkPos_ == iter_->dataSize()) { + chunkPos_ = 0; + ++iter_; + } + } + + void rewind() { + iter_ = bufferImpl_->beginRead(); + bytesRemaining_ = bytes_; + chunkPos_ = 0; + } + + const data_type *addr() const { + return iter_->tellReadPos() + chunkPos_; + } + + public: + + BufferReader(const InputBuffer &buf) : + bufferImpl_(buf.pimpl_), + iter_(bufferImpl_->beginRead()), + bytes_(bufferImpl_->size()), + bytesRemaining_(bytes_), + chunkPos_(0) + { } + + BufferReader(const OutputBuffer &buf) : + bufferImpl_(buf.pimpl_), + iter_(bufferImpl_->beginRead()), + bytes_(bufferImpl_->size()), + bytesRemaining_(bytes_), + chunkPos_(0) + { } + + /** + * How many bytes are still not read from this buffer. + **/ + + size_type bytesRemaining() const { + return bytesRemaining_; + } + + /** + * Read a block of data from the front of the buffer. + **/ + + size_type bytesRead() const { + return bytes_ - bytesRemaining_; + } + + /** + * Read a block of data from the buffer. + **/ + + size_type read(data_type *data, size_type size) { + + if(size > bytesRemaining_) { + size = bytesRemaining_; + } + size_type sizeToRead = size; + + while(sizeToRead) { + const size_type toRead = std::min(sizeToRead, chunkRemaining()); + memcpy(data, addr(), toRead); + sizeToRead -= toRead; + data += toRead; + incrementChunk(toRead); + } + + return size; + } + + /** + * Read a block of data from the buffer. + **/ + + bool read(std::string &str, size_type size) { + if(size > bytesRemaining_) { + return false; + } + + if(size <= chunkRemaining()) { + fastStringRead(str, size); + } + else { + slowStringRead(str, size); + } + + return true; + } + + + /** + * Read a single value from the buffer. The value must be a "fundamental" + * type, e.g. int, float, etc. (otherwise use the other writeTo tests). + * + **/ + + template<typename T> + bool read(T &val) { + return read(val, std::is_fundamental<T>()); + } + + /** + * Skips a block of data from the buffer. + **/ + + bool skip(size_type bytes) { + bool skipped = false; + if(bytes <= bytesRemaining_) { + doSkip(bytes); + skipped = true; + } + return skipped; + } + + /** + * Seek to a position in the buffer. + **/ + + bool seek(size_type pos) { + if(pos > bytes_) { + return false; + } + + size_type toSkip = pos; + size_type curPos = bytesRead(); + // if the seek position is ahead, we can use skip to get there + if(pos >= curPos) { + toSkip -= curPos; + } + // if the seek position is ahead of the start of the chunk we can back up to + // start of the chunk + else if(pos >= (curPos - chunkPos_)) { + curPos -= chunkPos_; + bytesRemaining_ += chunkPos_; + chunkPos_ = 0; + toSkip -= curPos; + } + else { + rewind(); + } + doSkip(toSkip); + return true; + } + + bool peek(char &val) { + bool ret = (bytesRemaining_ > 0); + if(ret) { + val = *(addr()); + } + return ret; + } + + InputBuffer copyData(size_type bytes) { + if(bytes > bytesRemaining_) { + // force no copy + bytes = 0; + } + detail::BufferImpl::SharedPtr newImpl(new detail::BufferImpl); + if(bytes) { + bufferImpl_->copyData(*newImpl, iter_, chunkPos_, bytes); + doSkip(bytes); + } + return InputBuffer(newImpl); + } + + private: + + void doSkip(size_type sizeToSkip) { + + while(sizeToSkip) { + const size_type toSkip = std::min(sizeToSkip, chunkRemaining()); + sizeToSkip -= toSkip; + incrementChunk(toSkip); + } + } + + template<typename T> + bool read(T &val, const std::true_type&) + { + if(sizeof(T) > bytesRemaining_) { + return false; + } + + if (sizeof(T) <= chunkRemaining()) { + val = *(reinterpret_cast<const T*> (addr())); + incrementChunk(sizeof(T)); + } + else { + read(reinterpret_cast<data_type *>(&val), sizeof(T)); + } + return true; + } + + /// An uninstantiable function, that is if boost::is_fundamental check fails + template<typename T> + bool read(T &, const std::false_type&) + { + static_assert(sizeof(T) == 0, "Not a valid type to read"); + return false; + } + + void fastStringRead(std::string &str, size_type sizeToCopy) { + str.assign(addr(), sizeToCopy); + incrementChunk(sizeToCopy); + } + + void slowStringRead(std::string &str, size_type sizeToCopy) { + str.clear(); + str.reserve(sizeToCopy); + while(sizeToCopy) { + const size_type toCopy = std::min(sizeToCopy, chunkRemaining()); + str.append(addr(), toCopy); + sizeToCopy -= toCopy; + incrementChunk(toCopy); + } + } + + detail::BufferImpl::ConstSharedPtr bufferImpl_; + detail::BufferImpl::ChunkList::const_iterator iter_; + size_type bytes_; + size_type bytesRemaining_; + size_type chunkPos_; +}; + + +} // namespace + +#endif diff --git a/contrib/libs/apache/avro/api/buffer/detail/BufferDetail.hh b/contrib/libs/apache/avro/api/buffer/detail/BufferDetail.hh new file mode 100644 index 0000000000..29a2e00b4e --- /dev/null +++ b/contrib/libs/apache/avro/api/buffer/detail/BufferDetail.hh @@ -0,0 +1,555 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_BufferDetail_hh__ +#define avro_BufferDetail_hh__ + +#include <boost/shared_ptr.hpp> +#include <boost/shared_array.hpp> +#include <boost/static_assert.hpp> +#include <boost/function.hpp> +#include <boost/utility.hpp> +#ifdef HAVE_BOOST_ASIO +#include <boost/asio/buffer.hpp> +#endif +#include <exception> +#include <cassert> +#include <deque> + +/** + * \file BufferDetail.hh + * + * \brief The implementation details for the Buffer class. + * + **/ + +namespace avro { + +namespace detail { + +typedef char data_type; +typedef size_t size_type; +#ifdef HAVE_BOOST_ASIO +typedef boost::asio::const_buffer ConstAsioBuffer; +typedef boost::asio::mutable_buffer MutableAsioBuffer; +#endif + +/// The size in bytes for blocks backing buffer chunks. +const size_type kMinBlockSize = 4096; +const size_type kMaxBlockSize = 16384; +const size_type kDefaultBlockSize = kMinBlockSize; + +typedef boost::function<void(void)> free_func; + +/** + * Simple class to hold a functor that executes on delete + **/ +class CallOnDestroy { + public: + CallOnDestroy(const free_func &func) : func_(func) + { } + ~CallOnDestroy() { + if (func_) { + func_(); + } + } + private: + free_func func_; +}; + +/** + * \brief A chunk is the building block for buffers. + * + * A chunk is backed by a memory block, and internally it maintains information + * about which area of the block it may use, and the portion of this area that + * contains valid data. More than one chunk may share the same underlying + * block, but the areas should never overlap. Chunk holds a shared pointer to + * an array of bytes so that shared blocks are reference counted. + * + * When a chunk is copied, the copy shares the same underlying buffer, but the + * copy receives its own copies of the start/cursor/end pointers, so each copy + * can be manipulated independently. This allows different buffers to share + * the same non-overlapping parts of a chunk, or even overlapping parts of a + * chunk if the situation arises. + * + **/ + +class Chunk +{ + + public: + + typedef boost::shared_ptr<Chunk> SharedPtr; + + /// Default constructor, allocates a new underlying block for this chunk. + Chunk(size_type size) : + underlyingBlock_(new data_type[size]), + readPos_(underlyingBlock_.get()), + writePos_(readPos_), + endPos_(readPos_ + size) + { } + + /// Foreign buffer constructor, uses the supplied data for this chunk, and + /// only for reading. + Chunk(const data_type *data, size_type size, const free_func &func) : + callOnDestroy_(new CallOnDestroy(func)), + readPos_(const_cast<data_type *>(data)), + writePos_(readPos_ + size), + endPos_(writePos_) + { } + + private: + // reference counted object will call a functor when it's destroyed + boost::shared_ptr<CallOnDestroy> callOnDestroy_; + + public: + + /// Remove readable bytes from the front of the chunk by advancing the + /// chunk start position. + void truncateFront(size_type howMuch) { + readPos_ += howMuch; + assert(readPos_ <= writePos_); + } + + /// Remove readable bytes from the back of the chunk by moving the + /// chunk cursor position. + void truncateBack(size_type howMuch) { + writePos_ -= howMuch; + assert(readPos_ <= writePos_); + } + + /// Tell the position the next byte may be written to. + data_type *tellWritePos() const { + return writePos_; + } + + /// Tell the position of the first byte containing valid data. + const data_type *tellReadPos() const { + return readPos_; + } + + /// After a write operation, increment the write position. + void incrementCursor(size_type howMuch) { + writePos_ += howMuch; + assert(writePos_ <= endPos_); + } + + /// Tell how many bytes of data were written to this chunk. + size_type dataSize() const { + return (writePos_ - readPos_); + } + + /// Tell how many bytes this chunk has available to write to. + size_type freeSize() const { + return (endPos_ - writePos_); + } + + /// Tell how many bytes of data this chunk can hold (used and free). + size_type capacity() const { + return (endPos_ - readPos_); + } + + private: + + friend bool operator==(const Chunk &lhs, const Chunk &rhs); + friend bool operator!=(const Chunk &lhs, const Chunk &rhs); + + // more than one buffer can share an underlying block, so use SharedPtr + boost::shared_array<data_type> underlyingBlock_; + + data_type *readPos_; ///< The first readable byte in the block + data_type *writePos_; ///< The end of written data and start of free space + data_type *endPos_; ///< Marks the end of the usable block area +}; + +/** + * Compare underlying buffers and return true if they are equal + **/ +inline bool operator==(const Chunk &lhs, const Chunk &rhs) { + return lhs.underlyingBlock_ == rhs.underlyingBlock_; +} + +/** + * Compare underlying buffers and return true if they are unequal + **/ +inline bool operator!=(const Chunk &lhs, const Chunk &rhs) { + return lhs.underlyingBlock_ != rhs.underlyingBlock_; +} + + +/** + * \brief Implementation details for Buffer class + * + * Internally, BufferImpl keeps two lists of chunks, one list consists entirely of + * chunks containing data, and one list which contains chunks with free space. + * + * + */ + +class BufferImpl : boost::noncopyable +{ + + /// Add a new chunk to the list of chunks for this buffer, growing the + /// buffer by the default block size. + void allocChunkChecked(size_type size = kDefaultBlockSize) + { + writeChunks_.push_back(Chunk(size)); + freeSpace_ += writeChunks_.back().freeSize(); + } + + /// Add a new chunk to the list of chunks for this buffer, growing the + /// buffer by the requested size, but within the range of a minimum and + /// maximum. + void allocChunk(size_type size) + { + if(size < kMinBlockSize) { + size = kMinBlockSize; + } + else if (size > kMaxBlockSize) { + size = kMaxBlockSize; + } + allocChunkChecked(size); + } + + /// Update the state of the chunks after a write operation. This function + /// ensures the chunk states are consistent with the write. + void postWrite(size_type size) + { + + // precondition to this function is that the writeChunk_.front() + // contains the data that was just written, so make sure writeChunks_ + // is not empty: + + assert(size <= freeSpace_ && !writeChunks_.empty()); + + // This is probably the one tricky part of BufferImpl. The data that + // was written now exists in writeChunks_.front(). Now we must make + // sure that same data exists in readChunks_.back(). + // + // There are two cases: + // + // 1. readChunks_.last() and writeChunk_.front() refer to the same + // underlying block, in which case they both just need their cursor + // updated to reflect the new state. + // + // 2. readChunk_.last() is not the same block as writeChunks_.front(), + // in which case it should be, since the writeChunk.front() contains + // the next bit of data that will be appended to readChunks_, and + // therefore needs to be copied there so we can proceed with updating + // their state. + // + + // if readChunks_ is not the same as writeChunks_.front(), make a copy + // of it there + + if(readChunks_.empty() || (readChunks_.back() != writeChunks_.front())) { + const Chunk &curChunk = writeChunks_.front(); + readChunks_.push_back(curChunk); + + // Any data that existed in the write chunk previously doesn't + // belong to this buffer (otherwise it would have already been + // added to the readChunk_ list). Here, adjust the start of the + // readChunk to begin after any data already existing in curChunk + + readChunks_.back().truncateFront( curChunk.dataSize()); + } + + assert(readChunks_.back().freeSize() == writeChunks_.front().freeSize()); + + // update the states of both readChunks_ and writeChunks_ to indicate that they are + // holding the new data + + readChunks_.back().incrementCursor(size); + writeChunks_.front().incrementCursor(size); + size_ += size; + freeSpace_ -= size; + + // if there is no more free space in writeChunks_, the next write cannot use + // it, so dispose of it now + + if(writeChunks_.front().freeSize() == 0) { + writeChunks_.pop_front(); + } + } + + public: + + typedef std::deque<Chunk> ChunkList; + typedef boost::shared_ptr<BufferImpl> SharedPtr; + typedef boost::shared_ptr<const BufferImpl> ConstSharedPtr; + + /// Default constructor, creates a buffer without any chunks + BufferImpl() : + freeSpace_(0), + size_(0) + { } + + /// Copy constructor, gets a copy of all the chunks with data. + explicit BufferImpl(const BufferImpl &src) : + readChunks_(src.readChunks_), + freeSpace_(0), + size_(src.size_) + { } + + /// Amount of data held in this buffer. + size_type size() const { + return size_; + } + + /// Capacity that may be written before the buffer must allocate more memory. + size_type freeSpace() const { + return freeSpace_; + } + + /// Add enough free chunks to make the reservation size available. + /// Actual amount may be more (rounded up to next chunk). + void reserveFreeSpace(size_type reserveSize) { + while(freeSpace_ < reserveSize) { + allocChunk(reserveSize - freeSpace_); + } + } + + /// Return the chunk avro's begin iterator for reading. + ChunkList::const_iterator beginRead() const { + return readChunks_.begin(); + } + + /// Return the chunk avro's end iterator for reading. + ChunkList::const_iterator endRead() const { + return readChunks_.end(); + } + + /// Return the chunk avro's begin iterator for writing. + ChunkList::const_iterator beginWrite() const { + return writeChunks_.begin(); + } + + /// Return the chunk avro's end iterator for writing. + ChunkList::const_iterator endWrite() const { + return writeChunks_.end(); + } + + /// Write a single value to buffer, add a new chunk if necessary. + template<typename T> + void writeTo(T val, const std::true_type&) + { + if(freeSpace_ && (sizeof(T) <= writeChunks_.front().freeSize())) { + // fast path, there's enough room in the writeable chunk to just + // straight out copy it + *(reinterpret_cast <T*> ( writeChunks_.front().tellWritePos()) ) = val; + postWrite(sizeof(T)); + } + else { + // need to fixup chunks first, so use the regular memcpy + // writeTo method + writeTo(reinterpret_cast<data_type*>(&val), sizeof(T)); + } + } + + /// An uninstantiable function, this is if boost::is_fundamental check fails, + /// and will compile-time assert. + template<typename T> + void writeTo(T, const std::false_type&) + { + BOOST_STATIC_ASSERT(sizeof(T)==0); + } + + /// Write a block of data to the buffer, adding new chunks if necessary. + size_type writeTo(const data_type *data, size_type size) + { + size_type bytesLeft = size; + while(bytesLeft) { + + if(freeSpace_ == 0) { + allocChunkChecked(); + } + + Chunk &chunk = writeChunks_.front(); + size_type toCopy = std::min<size_type>(chunk.freeSize(), bytesLeft); + assert(toCopy); + memcpy(chunk.tellWritePos(), data, toCopy); + postWrite(toCopy); + data += toCopy; + bytesLeft -= toCopy; + } + return size; + } + + /// Update internal status of chunks after data is written using iterator. + size_type wroteTo(size_type size) + { + assert(size <= freeSpace_); + size_type bytesLeft = size; + while (bytesLeft) { + + Chunk &chunk = writeChunks_.front(); + size_type wrote = std::min<size_type>(chunk.freeSize(), bytesLeft); + assert(wrote); + postWrite(wrote); + bytesLeft -= wrote; + } + return size; + } + + /// Append the chunks that have data in src to this buffer + void append(const BufferImpl &src) { + std::copy(src.readChunks_.begin(), src.readChunks_.end(), std::back_inserter(readChunks_)); + size_ += src.size_; + } + + /// Remove all the chunks that contain data from this buffer. + void discardData() { + readChunks_.clear(); + size_ = 0; + } + + /// Remove the specified amount of data from the chunks, starting at the front. + void discardData(size_type bytes) + { + assert(bytes && bytes <= size_); + + size_type bytesToDiscard = bytes; + while( bytesToDiscard ) { + + size_t currentSize = readChunks_.front().dataSize(); + + // see if entire chunk is discarded + if(currentSize <= bytesToDiscard) { + readChunks_.pop_front(); + bytesToDiscard -= currentSize; + } + else { + readChunks_.front().truncateFront(bytesToDiscard); + bytesToDiscard = 0; + } + } + + size_ -= bytes; + } + + /// Remove the specified amount of data from the chunks, moving the + /// data to dest's chunks + void extractData(BufferImpl &dest, size_type bytes) + { + assert(bytes && bytes <= size_); + + size_type bytesToExtract = bytes; + while( bytesToExtract ) { + + size_t currentSize = readChunks_.front().dataSize(); + dest.readChunks_.push_back(readChunks_.front()); + + // see if entire chunk was extracted + if(currentSize <= bytesToExtract) { + readChunks_.pop_front(); + bytesToExtract -= currentSize; + } + else { + readChunks_.front().truncateFront(bytesToExtract); + size_t excess = currentSize - bytesToExtract; + dest.readChunks_.back().truncateBack(excess); + bytesToExtract = 0; + } + } + + size_ -= bytes; + dest.size_ += bytes; + } + + /// Move data from this to the destination, leaving this buffer without data + void extractData(BufferImpl &dest) + { + assert(dest.readChunks_.empty()); + dest.readChunks_.swap(readChunks_); + dest.size_ = size_; + size_ = 0; + } + + /// Copy data to a different buffer by copying the chunks. It's + /// a bit like extract, but without modifying the source buffer. + void copyData(BufferImpl &dest, + ChunkList::const_iterator iter, + size_type offset, + size_type bytes) const + { + // now we are positioned to start the copying, copy as many + // chunks as we need, the first chunk may have a non-zero offset + // if the data to copy is not at the start of the chunk + size_type copied = 0; + while(copied < bytes) { + + dest.readChunks_.push_back(*iter); + + // offset only applies in the first chunk, + // all subsequent chunks are copied from the start + dest.readChunks_.back().truncateFront(offset); + offset = 0; + + copied += dest.readChunks_.back().dataSize(); + ++iter; + } + + // if the last chunk copied has more bytes than we need, truncate it + size_type excess = copied - bytes; + dest.readChunks_.back().truncateBack(excess); + + dest.size_ += bytes; + } + + /// The number of chunks containing data. Used for debugging. + int numDataChunks() const { + return readChunks_.size(); + } + + /// The number of chunks containing free space (note that an entire chunk + /// may not be free). Used for debugging. + int numFreeChunks() const { + return writeChunks_.size(); + } + + /// Add unmanaged data to the buffer. The buffer will not automatically + /// free the data, but it will call the supplied function when the data is + /// no longer referenced by the buffer (or copies of the buffer). + void appendForeignData(const data_type *data, size_type size, const free_func &func) { + readChunks_.push_back(Chunk(data, size, func)); + size_ += size; + } + + private: + + /// Assignment not allowed + BufferImpl& operator=(const BufferImpl &src); + /* { + readChunks_.assign(src.readChunks_.begin(), src.readChunks_.end()); + size_ = src.size(); + return *this; + } */ + + ChunkList readChunks_; ///< chunks of this buffer containing data + ChunkList writeChunks_; ///< chunks of this buffer containing free space + + size_type freeSpace_; ///< capacity of buffer before allocation required + size_type size_; ///< amount of data in buffer + +}; + +} // detail namespace + +} // namespace + +#endif diff --git a/contrib/libs/apache/avro/api/buffer/detail/BufferDetailIterator.hh b/contrib/libs/apache/avro/api/buffer/detail/BufferDetailIterator.hh new file mode 100644 index 0000000000..c05f219a64 --- /dev/null +++ b/contrib/libs/apache/avro/api/buffer/detail/BufferDetailIterator.hh @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_BufferDetailIterator_hh__ +#define avro_BufferDetailIterator_hh__ + +#include "BufferDetail.hh" + +/** + * \file BufferDetailIterator.hh + * + * \brief The implementation details for the Buffer iterators. + **/ + +namespace avro { + +namespace detail { + +/** + * \brief Implements conversion from a chunk to asio::const_buffer + * + * Iterators for an InputBuffer will iterate over the avro of chunks, so + * internally they contain an iterator. But the iterator needs to be + * convertable to an asio buffer for use in boost::asio functions. This class + * wraps the iterator with a cast operator to do this conversion. + **/ + +struct InputIteratorHelper +{ + /// Construct a helper with an unnassigned iterator. + InputIteratorHelper() : + iter_() + {} + + /// Construct a helper with an iterator. + InputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : + iter_(iter) + {} + + /// The location of valid data in this chunk. + const data_type *data() const { + return iter_->tellReadPos(); + } + + /// The size of valid data in this chunk. + size_type size() const { + return iter_->dataSize(); + } + + /// Conversion operator. It doesn't check for null, because the only + /// the only time the chunk should be null is when it's the iterator + /// end(), which should never be dereferenced anyway. +#ifdef HAVE_BOOST_ASIO + operator ConstAsioBuffer() const { + return ConstAsioBuffer(data(), size()); + } +#endif + + BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator +}; + +/** + * \brief Implements conversion from a chunk to asio::buffer + * + * Iterators for an OutputBuffer will iterate over the avro of chunks, so + * internally they contain an iterator. But the iterator needs to be + * convertable to an asio buffer for use in boost::asio functions. This class + * wraps the iterator with a cast operator to do this conversion. + */ + +struct OutputIteratorHelper +{ + /// Construct a helper with an unnassigned iterator. + OutputIteratorHelper() : + iter_() + {} + + /// Construct a helper with an iterator. + OutputIteratorHelper(const BufferImpl::ChunkList::const_iterator &iter) : + iter_(iter) + {} + + /// The location of the first writable byte in this chunk. + data_type *data() const { + return iter_->tellWritePos(); + } + + /// The size of area that can be written in this chunk. + size_type size() const { + return iter_->freeSize(); + } + + /// Conversion operator. It doesn't check for null, because the only + /// the only time the chunk should be null is when it's the iterator + /// end(), which should never be dereferenced anyway. +#ifdef HAVE_BOOST_ASIO + operator MutableAsioBuffer() const { + return MutableAsioBuffer(data(), size()); + } +#endif + + BufferImpl::ChunkList::const_iterator iter_; ///< the current iterator +}; + +/** + * \brief Implements the iterator for Buffer, that iterates through the + * buffer's chunks. + **/ + +template<typename Helper> +class BufferIterator +{ + + public: + + typedef BufferIterator<Helper> this_type; + + /** + * @name Typedefs + * + * STL iterators define the following declarations. According to + * boost::asio documentation, the library expects the iterator to be + * bidirectional, however this implements only the forward iterator type. + * So far this has not created any problems with asio, but may change if + * future versions of the asio require it. + **/ + + //@{ + typedef std::forward_iterator_tag iterator_category; // this is a lie to appease asio + typedef Helper value_type; + typedef std::ptrdiff_t difference_type; + typedef value_type* pointer; + typedef value_type& reference; + //@} + + /// Construct an unitialized iterator. + BufferIterator() : + helper_() + { } + + /* The default implementations are good here + /// Copy constructor. + BufferIterator(const BufferIterator &src) : + helper_(src.helper_) + { } + /// Assignment. + this_type& operator= (const this_type &rhs) { + helper_ = rhs.helper_; + return *this; + } + */ + + /// Construct iterator at the position in the buffer's chunk list. + explicit BufferIterator(BufferImpl::ChunkList::const_iterator iter) : + helper_(iter) + { } + + /// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper. + reference operator *() { + return helper_; + } + + /// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper. + const value_type &operator *() const { + return helper_; + } + + /// Dereference iterator, returns InputIteratorHelper or OutputIteratorHelper wrapper. + pointer operator->() { + return &helper_; + } + + /// Dereference iterator, returns const InputIteratorHelper or OutputIteratorHelper wrapper. + const value_type *operator->() const { + return &helper_; + } + + /// Increment to next chunk in list, or to end() iterator. + this_type& operator++() + { + ++helper_.iter_; + return *this; + } + + /// Increment to next chunk in list, or to end() iterator. + this_type operator++(int) + { + this_type ret = *this; + ++helper_.iter_; + return ret; + } + + /// True if iterators point to same chunks. + bool operator==(const this_type &rhs) const { + return (helper_.iter_ == rhs.helper_.iter_); + } + + /// True if iterators point to different chunks. + bool operator!=(const this_type &rhs) const { + return (helper_.iter_ != rhs.helper_.iter_); + } + + private: + + Helper helper_; +}; + +typedef BufferIterator<InputIteratorHelper> InputBufferIterator; +typedef BufferIterator<OutputIteratorHelper> OutputBufferIterator; + +} // detail namespace + +} // namespace + +#endif |