diff options
author | thegeorg <thegeorg@yandex-team.ru> | 2022-02-10 16:45:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:08 +0300 |
commit | 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (patch) | |
tree | 506dac10f5df94fab310584ee51b24fc5a081c22 /contrib/libs/apache/avro/impl/parsing | |
parent | 2d37894b1b037cf24231090eda8589bbb44fb6fc (diff) | |
download | ydb-4e839db24a3bbc9f1c610c43d6faaaa99824dcca.tar.gz |
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/apache/avro/impl/parsing')
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/JsonCodec.cc | 1436 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/ResolvingDecoder.cc | 1480 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/Symbol.cc | 222 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/Symbol.hh | 1708 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/ValidatingCodec.cc | 1182 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/parsing/ValidatingCodec.hh | 102 |
6 files changed, 3065 insertions, 3065 deletions
diff --git a/contrib/libs/apache/avro/impl/parsing/JsonCodec.cc b/contrib/libs/apache/avro/impl/parsing/JsonCodec.cc index 8bca2984ae..73271fca55 100644 --- a/contrib/libs/apache/avro/impl/parsing/JsonCodec.cc +++ b/contrib/libs/apache/avro/impl/parsing/JsonCodec.cc @@ -1,718 +1,718 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define __STDC_LIMIT_MACROS - -#include <string> -#include <map> -#include <algorithm> -#include <ctype.h> -#include <memory> -#include <boost/math/special_functions/fpclassify.hpp> - -#include "ValidatingCodec.hh" -#include "Symbol.hh" -#include "ValidSchema.hh" -#include "Decoder.hh" -#include "Encoder.hh" -#include "NodeImpl.hh" - -#include "../json/JsonIO.hh" - -namespace avro { - -namespace parsing { - -using std::make_shared; - -using std::map; -using std::vector; -using std::string; -using std::reverse; -using std::ostringstream; -using std::istringstream; - -using avro::json::JsonParser; -using avro::json::JsonGenerator; -using avro::json::JsonNullFormatter; - -class JsonGrammarGenerator : public ValidatingGrammarGenerator { - ProductionPtr doGenerate(const NodePtr& n, - std::map<NodePtr, ProductionPtr> &m); -}; - -static std::string nameOf(const NodePtr& n) -{ - if (n->hasName()) { - return n->name(); - } - std::ostringstream oss; - oss << n->type(); - return oss.str(); -} - -ProductionPtr JsonGrammarGenerator::doGenerate(const NodePtr& n, - std::map<NodePtr, ProductionPtr> &m) { - switch (n->type()) { - case AVRO_NULL: - case AVRO_BOOL: - case AVRO_INT: - case AVRO_LONG: - case AVRO_FLOAT: - case AVRO_DOUBLE: - case AVRO_STRING: - case AVRO_BYTES: - case AVRO_FIXED: - case AVRO_ARRAY: - case AVRO_MAP: - case AVRO_SYMBOLIC: - return ValidatingGrammarGenerator::doGenerate(n, m); - case AVRO_RECORD: - { - ProductionPtr result = make_shared<Production>(); - - m.erase(n); - - size_t c = n->leaves(); - result->reserve(2 + 2 * c); - result->push_back(Symbol::recordStartSymbol()); - for (size_t i = 0; i < c; ++i) { - const NodePtr& leaf = n->leafAt(i); - ProductionPtr v = doGenerate(leaf, m); - result->push_back(Symbol::fieldSymbol(n->nameAt(i))); - copy(v->rbegin(), v->rend(), back_inserter(*result)); - } - result->push_back(Symbol::recordEndSymbol()); - reverse(result->begin(), result->end()); - - m[n] = result; - return make_shared<Production>(1, Symbol::indirect(result)); - } - case AVRO_ENUM: - { - vector<string> nn; - size_t c = n->names(); - nn.reserve(c); - for (size_t i = 0; i < c; ++i) { - nn.push_back(n->nameAt(i)); - } - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::nameListSymbol(nn)); - result->push_back(Symbol::enumSymbol()); - m[n] = result; - return result; - } - case AVRO_UNION: - { - size_t c = n->leaves(); - - vector<ProductionPtr> vv; - vv.reserve(c); - - vector<string> names; - names.reserve(c); - - for (size_t i = 0; i < c; ++i) { - const NodePtr& nn = n->leafAt(i); - ProductionPtr v = doGenerate(nn, m); - if (nn->type() != AVRO_NULL) { - ProductionPtr v2 = make_shared<Production>(); - v2->push_back(Symbol::recordEndSymbol()); - copy(v->begin(), v->end(), back_inserter(*v2)); - v.swap(v2); - } - vv.push_back(v); - names.push_back(nameOf(nn)); - } - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::alternative(vv)); - result->push_back(Symbol::nameListSymbol(names)); - result->push_back(Symbol::unionSymbol()); - return result; - } - default: - throw Exception("Unknown node type"); - } -} - -static void expectToken(JsonParser& in, JsonParser::Token tk) -{ - in.expectToken(tk); -} - -class JsonDecoderHandler { - JsonParser& in_; -public: - JsonDecoderHandler(JsonParser& p) : in_(p) { } - size_t handle(const Symbol& s) { - switch (s.kind()) { - case Symbol::sRecordStart: - expectToken(in_, JsonParser::tkObjectStart); - break; - case Symbol::sRecordEnd: - expectToken(in_, JsonParser::tkObjectEnd); - break; - case Symbol::sField: - expectToken(in_, JsonParser::tkString); - if (s.extra<string>() != in_.stringValue()) { - throw Exception("Incorrect field"); - } - break; - default: - break; - } - return 0; - } -}; - -template <typename P> -class JsonDecoder : public Decoder { - JsonParser in_; - JsonDecoderHandler handler_; - P parser_; - - void init(InputStream& is); - void decodeNull(); - bool decodeBool(); - int32_t decodeInt(); - int64_t decodeLong(); - float decodeFloat(); - double decodeDouble(); - void decodeString(string& value); - void skipString(); - void decodeBytes(vector<uint8_t>& value); - void skipBytes(); - void decodeFixed(size_t n, vector<uint8_t>& value); - void skipFixed(size_t n); - size_t decodeEnum(); - size_t arrayStart(); - size_t arrayNext(); - size_t skipArray(); - size_t mapStart(); - size_t mapNext(); - size_t skipMap(); - size_t decodeUnionIndex(); - - void expect(JsonParser::Token tk); - void skipComposite(); - void drain(); -public: - - JsonDecoder(const ValidSchema& s) : - handler_(in_), - parser_(JsonGrammarGenerator().generate(s), NULL, handler_) { } - -}; - -template <typename P> -void JsonDecoder<P>::init(InputStream& is) -{ - in_.init(is); - parser_.reset(); -} - -template <typename P> -void JsonDecoder<P>::expect(JsonParser::Token tk) -{ - expectToken(in_, tk); -} - -template <typename P> -void JsonDecoder<P>::decodeNull() -{ - parser_.advance(Symbol::sNull); - expect(JsonParser::tkNull); -} - -template <typename P> -bool JsonDecoder<P>::decodeBool() -{ - parser_.advance(Symbol::sBool); - expect(JsonParser::tkBool); - bool result = in_.boolValue(); - return result; -} - -template <typename P> -int32_t JsonDecoder<P>::decodeInt() -{ - parser_.advance(Symbol::sInt); - expect(JsonParser::tkLong); - int64_t result = in_.longValue(); - if (result < INT32_MIN || result > INT32_MAX) { - throw Exception(boost::format("Value out of range for Avro int: %1%") - % result); - } - return static_cast<int32_t>(result); -} - -template <typename P> -int64_t JsonDecoder<P>::decodeLong() -{ - parser_.advance(Symbol::sLong); - expect(JsonParser::tkLong); - int64_t result = in_.longValue(); - return result; -} - -template <typename P> -float JsonDecoder<P>::decodeFloat() -{ - parser_.advance(Symbol::sFloat); - expect(JsonParser::tkDouble); - double result = in_.doubleValue(); - return static_cast<float>(result); -} - -template <typename P> -double JsonDecoder<P>::decodeDouble() -{ - parser_.advance(Symbol::sDouble); - expect(JsonParser::tkDouble); - double result = in_.doubleValue(); - return result; -} - -template <typename P> -void JsonDecoder<P>::decodeString(string& value) -{ - parser_.advance(Symbol::sString); - expect(JsonParser::tkString); - value = in_.stringValue(); -} - -template <typename P> -void JsonDecoder<P>::skipString() -{ - parser_.advance(Symbol::sString); - expect(JsonParser::tkString); -} - -static vector<uint8_t> toBytes(const string& s) -{ - return vector<uint8_t>(s.begin(), s.end()); -} - -template <typename P> -void JsonDecoder<P>::decodeBytes(vector<uint8_t>& value ) -{ - parser_.advance(Symbol::sBytes); - expect(JsonParser::tkString); - value = toBytes(in_.bytesValue()); -} - -template <typename P> -void JsonDecoder<P>::skipBytes() -{ - parser_.advance(Symbol::sBytes); - expect(JsonParser::tkString); -} - -template <typename P> -void JsonDecoder<P>::decodeFixed(size_t n, vector<uint8_t>& value) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(n); - expect(JsonParser::tkString); - value = toBytes(in_.bytesValue()); - if (value.size() != n) { - throw Exception("Incorrect value for fixed"); - } -} - -template <typename P> -void JsonDecoder<P>::skipFixed(size_t n) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(n); - expect(JsonParser::tkString); - vector<uint8_t> result = toBytes(in_.bytesValue()); - if (result.size() != n) { - throw Exception("Incorrect value for fixed"); - } -} - -template <typename P> -size_t JsonDecoder<P>::decodeEnum() -{ - parser_.advance(Symbol::sEnum); - expect(JsonParser::tkString); - size_t result = parser_.indexForName(in_.stringValue()); - return result; -} - -template <typename P> -size_t JsonDecoder<P>::arrayStart() -{ - parser_.advance(Symbol::sArrayStart); - parser_.pushRepeatCount(0); - expect(JsonParser::tkArrayStart); - return arrayNext(); -} - -template <typename P> -size_t JsonDecoder<P>::arrayNext() -{ - parser_.processImplicitActions(); - if (in_.peek() == JsonParser::tkArrayEnd) { - in_.advance(); - parser_.popRepeater(); - parser_.advance(Symbol::sArrayEnd); - return 0; - } - parser_.nextRepeatCount(1); - return 1; -} - -template<typename P> -void JsonDecoder<P>::skipComposite() -{ - size_t level = 0; - for (; ;) { - switch (in_.advance()) { - case JsonParser::tkArrayStart: - case JsonParser::tkObjectStart: - ++level; - continue; - case JsonParser::tkArrayEnd: - case JsonParser::tkObjectEnd: - if (level == 0) { - return; - } - --level; - continue; - default: - continue; - } - } -} - -template<typename P> -void JsonDecoder<P>::drain() -{ - parser_.processImplicitActions(); - in_.drain(); -} - -template <typename P> -size_t JsonDecoder<P>::skipArray() -{ - parser_.advance(Symbol::sArrayStart); - parser_.pop(); - parser_.advance(Symbol::sArrayEnd); - expect(JsonParser::tkArrayStart); - skipComposite(); - return 0; -} - -template <typename P> -size_t JsonDecoder<P>::mapStart() -{ - parser_.advance(Symbol::sMapStart); - parser_.pushRepeatCount(0); - expect(JsonParser::tkObjectStart); - return mapNext(); -} - -template <typename P> -size_t JsonDecoder<P>::mapNext() -{ - parser_.processImplicitActions(); - if (in_.peek() == JsonParser::tkObjectEnd) { - in_.advance(); - parser_.popRepeater(); - parser_.advance(Symbol::sMapEnd); - return 0; - } - parser_.nextRepeatCount(1); - return 1; -} - -template <typename P> -size_t JsonDecoder<P>::skipMap() -{ - parser_.advance(Symbol::sMapStart); - parser_.pop(); - parser_.advance(Symbol::sMapEnd); - expect(JsonParser::tkObjectStart); - skipComposite(); - return 0; -} - -template <typename P> -size_t JsonDecoder<P>::decodeUnionIndex() -{ - parser_.advance(Symbol::sUnion); - - size_t result; - if (in_.peek() == JsonParser::tkNull) { - result = parser_.indexForName("null"); - } else { - expect(JsonParser::tkObjectStart); - expect(JsonParser::tkString); - result = parser_.indexForName(in_.stringValue()); - } - parser_.selectBranch(result); - return result; -} - -template<typename F = JsonNullFormatter> -class JsonHandler { - JsonGenerator<F>& generator_; -public: - JsonHandler(JsonGenerator<F>& g) : generator_(g) { } - size_t handle(const Symbol& s) { - switch (s.kind()) { - case Symbol::sRecordStart: - generator_.objectStart(); - break; - case Symbol::sRecordEnd: - generator_.objectEnd(); - break; - case Symbol::sField: - generator_.encodeString(s.extra<string>()); - break; - default: - break; - } - return 0; - } -}; - -template <typename P, typename F = JsonNullFormatter> -class JsonEncoder : public Encoder { - JsonGenerator<F> out_; - JsonHandler<F> handler_; - P parser_; - - void init(OutputStream& os); - void flush(); - int64_t byteCount() const; - void encodeNull(); - void encodeBool(bool b); - void encodeInt(int32_t i); - void encodeLong(int64_t l); - void encodeFloat(float f); - void encodeDouble(double d); - void encodeString(const std::string& s); - void encodeBytes(const uint8_t *bytes, size_t len); - void encodeFixed(const uint8_t *bytes, size_t len); - void encodeEnum(size_t e); - void arrayStart(); - void arrayEnd(); - void mapStart(); - void mapEnd(); - void setItemCount(size_t count); - void startItem(); - void encodeUnionIndex(size_t e); -public: - JsonEncoder(const ValidSchema& schema) : - handler_(out_), - parser_(JsonGrammarGenerator().generate(schema), NULL, handler_) { } -}; - -template<typename P, typename F> -void JsonEncoder<P, F>::init(OutputStream& os) -{ - out_.init(os); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::flush() -{ - parser_.processImplicitActions(); - out_.flush(); -} - -template<typename P, typename F> -int64_t JsonEncoder<P, F>::byteCount() const -{ - return out_.byteCount(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeNull() -{ - parser_.advance(Symbol::sNull); - out_.encodeNull(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeBool(bool b) -{ - parser_.advance(Symbol::sBool); - out_.encodeBool(b); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeInt(int32_t i) -{ - parser_.advance(Symbol::sInt); - out_.encodeNumber(i); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeLong(int64_t l) -{ - parser_.advance(Symbol::sLong); - out_.encodeNumber(l); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeFloat(float f) -{ - parser_.advance(Symbol::sFloat); - if (f == std::numeric_limits<float>::infinity()) { - out_.encodeString("Infinity"); - } else if (f == -std::numeric_limits<float>::infinity()) { - out_.encodeString("-Infinity"); - } else if (boost::math::isnan(f)) { - out_.encodeString("NaN"); - } else { - out_.encodeNumber(f); - } -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeDouble(double d) -{ - parser_.advance(Symbol::sDouble); - if (d == std::numeric_limits<double>::infinity()) { - out_.encodeString("Infinity"); - } else if (d == -std::numeric_limits<double>::infinity()) { - out_.encodeString("-Infinity"); - } else if (boost::math::isnan(d)) { - out_.encodeString("NaN"); - } else { - out_.encodeNumber(d); - } -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeString(const std::string& s) -{ - parser_.advance(Symbol::sString); - out_.encodeString(s); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeBytes(const uint8_t *bytes, size_t len) -{ - parser_.advance(Symbol::sBytes); - out_.encodeBinary(bytes, len); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeFixed(const uint8_t *bytes, size_t len) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(len); - out_.encodeBinary(bytes, len); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeEnum(size_t e) -{ - parser_.advance(Symbol::sEnum); - const string& s = parser_.nameForIndex(e); - out_.encodeString(s); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::arrayStart() -{ - parser_.advance(Symbol::sArrayStart); - parser_.pushRepeatCount(0); - out_.arrayStart(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::arrayEnd() -{ - parser_.popRepeater(); - parser_.advance(Symbol::sArrayEnd); - out_.arrayEnd(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::mapStart() -{ - parser_.advance(Symbol::sMapStart); - parser_.pushRepeatCount(0); - out_.objectStart(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::mapEnd() -{ - parser_.popRepeater(); - parser_.advance(Symbol::sMapEnd); - out_.objectEnd(); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::setItemCount(size_t count) -{ - parser_.nextRepeatCount(count); -} - -template<typename P, typename F> -void JsonEncoder<P, F>::startItem() -{ - parser_.processImplicitActions(); - if (parser_.top() != Symbol::sRepeater) { - throw Exception("startItem at not an item boundary"); - } -} - -template<typename P, typename F> -void JsonEncoder<P, F>::encodeUnionIndex(size_t e) -{ - parser_.advance(Symbol::sUnion); - - const std::string name = parser_.nameForIndex(e); - - if (name != "null") { - out_.objectStart(); - out_.encodeString(name); - } - parser_.selectBranch(e); -} - -} // namespace parsing - -DecoderPtr jsonDecoder(const ValidSchema& s) -{ - return std::make_shared<parsing::JsonDecoder< - parsing::SimpleParser<parsing::JsonDecoderHandler> > >(s); -} - -EncoderPtr jsonEncoder(const ValidSchema& schema) -{ - return std::make_shared<parsing::JsonEncoder< - parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonNullFormatter> >, avro::json::JsonNullFormatter> >(schema); -} - -EncoderPtr jsonPrettyEncoder(const ValidSchema& schema) -{ - return std::make_shared<parsing::JsonEncoder< - parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonPrettyFormatter> >, avro::json::JsonPrettyFormatter> >(schema); -} - -} // namespace avro - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define __STDC_LIMIT_MACROS + +#include <string> +#include <map> +#include <algorithm> +#include <ctype.h> +#include <memory> +#include <boost/math/special_functions/fpclassify.hpp> + +#include "ValidatingCodec.hh" +#include "Symbol.hh" +#include "ValidSchema.hh" +#include "Decoder.hh" +#include "Encoder.hh" +#include "NodeImpl.hh" + +#include "../json/JsonIO.hh" + +namespace avro { + +namespace parsing { + +using std::make_shared; + +using std::map; +using std::vector; +using std::string; +using std::reverse; +using std::ostringstream; +using std::istringstream; + +using avro::json::JsonParser; +using avro::json::JsonGenerator; +using avro::json::JsonNullFormatter; + +class JsonGrammarGenerator : public ValidatingGrammarGenerator { + ProductionPtr doGenerate(const NodePtr& n, + std::map<NodePtr, ProductionPtr> &m); +}; + +static std::string nameOf(const NodePtr& n) +{ + if (n->hasName()) { + return n->name(); + } + std::ostringstream oss; + oss << n->type(); + return oss.str(); +} + +ProductionPtr JsonGrammarGenerator::doGenerate(const NodePtr& n, + std::map<NodePtr, ProductionPtr> &m) { + switch (n->type()) { + case AVRO_NULL: + case AVRO_BOOL: + case AVRO_INT: + case AVRO_LONG: + case AVRO_FLOAT: + case AVRO_DOUBLE: + case AVRO_STRING: + case AVRO_BYTES: + case AVRO_FIXED: + case AVRO_ARRAY: + case AVRO_MAP: + case AVRO_SYMBOLIC: + return ValidatingGrammarGenerator::doGenerate(n, m); + case AVRO_RECORD: + { + ProductionPtr result = make_shared<Production>(); + + m.erase(n); + + size_t c = n->leaves(); + result->reserve(2 + 2 * c); + result->push_back(Symbol::recordStartSymbol()); + for (size_t i = 0; i < c; ++i) { + const NodePtr& leaf = n->leafAt(i); + ProductionPtr v = doGenerate(leaf, m); + result->push_back(Symbol::fieldSymbol(n->nameAt(i))); + copy(v->rbegin(), v->rend(), back_inserter(*result)); + } + result->push_back(Symbol::recordEndSymbol()); + reverse(result->begin(), result->end()); + + m[n] = result; + return make_shared<Production>(1, Symbol::indirect(result)); + } + case AVRO_ENUM: + { + vector<string> nn; + size_t c = n->names(); + nn.reserve(c); + for (size_t i = 0; i < c; ++i) { + nn.push_back(n->nameAt(i)); + } + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::nameListSymbol(nn)); + result->push_back(Symbol::enumSymbol()); + m[n] = result; + return result; + } + case AVRO_UNION: + { + size_t c = n->leaves(); + + vector<ProductionPtr> vv; + vv.reserve(c); + + vector<string> names; + names.reserve(c); + + for (size_t i = 0; i < c; ++i) { + const NodePtr& nn = n->leafAt(i); + ProductionPtr v = doGenerate(nn, m); + if (nn->type() != AVRO_NULL) { + ProductionPtr v2 = make_shared<Production>(); + v2->push_back(Symbol::recordEndSymbol()); + copy(v->begin(), v->end(), back_inserter(*v2)); + v.swap(v2); + } + vv.push_back(v); + names.push_back(nameOf(nn)); + } + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::alternative(vv)); + result->push_back(Symbol::nameListSymbol(names)); + result->push_back(Symbol::unionSymbol()); + return result; + } + default: + throw Exception("Unknown node type"); + } +} + +static void expectToken(JsonParser& in, JsonParser::Token tk) +{ + in.expectToken(tk); +} + +class JsonDecoderHandler { + JsonParser& in_; +public: + JsonDecoderHandler(JsonParser& p) : in_(p) { } + size_t handle(const Symbol& s) { + switch (s.kind()) { + case Symbol::sRecordStart: + expectToken(in_, JsonParser::tkObjectStart); + break; + case Symbol::sRecordEnd: + expectToken(in_, JsonParser::tkObjectEnd); + break; + case Symbol::sField: + expectToken(in_, JsonParser::tkString); + if (s.extra<string>() != in_.stringValue()) { + throw Exception("Incorrect field"); + } + break; + default: + break; + } + return 0; + } +}; + +template <typename P> +class JsonDecoder : public Decoder { + JsonParser in_; + JsonDecoderHandler handler_; + P parser_; + + void init(InputStream& is); + void decodeNull(); + bool decodeBool(); + int32_t decodeInt(); + int64_t decodeLong(); + float decodeFloat(); + double decodeDouble(); + void decodeString(string& value); + void skipString(); + void decodeBytes(vector<uint8_t>& value); + void skipBytes(); + void decodeFixed(size_t n, vector<uint8_t>& value); + void skipFixed(size_t n); + size_t decodeEnum(); + size_t arrayStart(); + size_t arrayNext(); + size_t skipArray(); + size_t mapStart(); + size_t mapNext(); + size_t skipMap(); + size_t decodeUnionIndex(); + + void expect(JsonParser::Token tk); + void skipComposite(); + void drain(); +public: + + JsonDecoder(const ValidSchema& s) : + handler_(in_), + parser_(JsonGrammarGenerator().generate(s), NULL, handler_) { } + +}; + +template <typename P> +void JsonDecoder<P>::init(InputStream& is) +{ + in_.init(is); + parser_.reset(); +} + +template <typename P> +void JsonDecoder<P>::expect(JsonParser::Token tk) +{ + expectToken(in_, tk); +} + +template <typename P> +void JsonDecoder<P>::decodeNull() +{ + parser_.advance(Symbol::sNull); + expect(JsonParser::tkNull); +} + +template <typename P> +bool JsonDecoder<P>::decodeBool() +{ + parser_.advance(Symbol::sBool); + expect(JsonParser::tkBool); + bool result = in_.boolValue(); + return result; +} + +template <typename P> +int32_t JsonDecoder<P>::decodeInt() +{ + parser_.advance(Symbol::sInt); + expect(JsonParser::tkLong); + int64_t result = in_.longValue(); + if (result < INT32_MIN || result > INT32_MAX) { + throw Exception(boost::format("Value out of range for Avro int: %1%") + % result); + } + return static_cast<int32_t>(result); +} + +template <typename P> +int64_t JsonDecoder<P>::decodeLong() +{ + parser_.advance(Symbol::sLong); + expect(JsonParser::tkLong); + int64_t result = in_.longValue(); + return result; +} + +template <typename P> +float JsonDecoder<P>::decodeFloat() +{ + parser_.advance(Symbol::sFloat); + expect(JsonParser::tkDouble); + double result = in_.doubleValue(); + return static_cast<float>(result); +} + +template <typename P> +double JsonDecoder<P>::decodeDouble() +{ + parser_.advance(Symbol::sDouble); + expect(JsonParser::tkDouble); + double result = in_.doubleValue(); + return result; +} + +template <typename P> +void JsonDecoder<P>::decodeString(string& value) +{ + parser_.advance(Symbol::sString); + expect(JsonParser::tkString); + value = in_.stringValue(); +} + +template <typename P> +void JsonDecoder<P>::skipString() +{ + parser_.advance(Symbol::sString); + expect(JsonParser::tkString); +} + +static vector<uint8_t> toBytes(const string& s) +{ + return vector<uint8_t>(s.begin(), s.end()); +} + +template <typename P> +void JsonDecoder<P>::decodeBytes(vector<uint8_t>& value ) +{ + parser_.advance(Symbol::sBytes); + expect(JsonParser::tkString); + value = toBytes(in_.bytesValue()); +} + +template <typename P> +void JsonDecoder<P>::skipBytes() +{ + parser_.advance(Symbol::sBytes); + expect(JsonParser::tkString); +} + +template <typename P> +void JsonDecoder<P>::decodeFixed(size_t n, vector<uint8_t>& value) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(n); + expect(JsonParser::tkString); + value = toBytes(in_.bytesValue()); + if (value.size() != n) { + throw Exception("Incorrect value for fixed"); + } +} + +template <typename P> +void JsonDecoder<P>::skipFixed(size_t n) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(n); + expect(JsonParser::tkString); + vector<uint8_t> result = toBytes(in_.bytesValue()); + if (result.size() != n) { + throw Exception("Incorrect value for fixed"); + } +} + +template <typename P> +size_t JsonDecoder<P>::decodeEnum() +{ + parser_.advance(Symbol::sEnum); + expect(JsonParser::tkString); + size_t result = parser_.indexForName(in_.stringValue()); + return result; +} + +template <typename P> +size_t JsonDecoder<P>::arrayStart() +{ + parser_.advance(Symbol::sArrayStart); + parser_.pushRepeatCount(0); + expect(JsonParser::tkArrayStart); + return arrayNext(); +} + +template <typename P> +size_t JsonDecoder<P>::arrayNext() +{ + parser_.processImplicitActions(); + if (in_.peek() == JsonParser::tkArrayEnd) { + in_.advance(); + parser_.popRepeater(); + parser_.advance(Symbol::sArrayEnd); + return 0; + } + parser_.nextRepeatCount(1); + return 1; +} + +template<typename P> +void JsonDecoder<P>::skipComposite() +{ + size_t level = 0; + for (; ;) { + switch (in_.advance()) { + case JsonParser::tkArrayStart: + case JsonParser::tkObjectStart: + ++level; + continue; + case JsonParser::tkArrayEnd: + case JsonParser::tkObjectEnd: + if (level == 0) { + return; + } + --level; + continue; + default: + continue; + } + } +} + +template<typename P> +void JsonDecoder<P>::drain() +{ + parser_.processImplicitActions(); + in_.drain(); +} + +template <typename P> +size_t JsonDecoder<P>::skipArray() +{ + parser_.advance(Symbol::sArrayStart); + parser_.pop(); + parser_.advance(Symbol::sArrayEnd); + expect(JsonParser::tkArrayStart); + skipComposite(); + return 0; +} + +template <typename P> +size_t JsonDecoder<P>::mapStart() +{ + parser_.advance(Symbol::sMapStart); + parser_.pushRepeatCount(0); + expect(JsonParser::tkObjectStart); + return mapNext(); +} + +template <typename P> +size_t JsonDecoder<P>::mapNext() +{ + parser_.processImplicitActions(); + if (in_.peek() == JsonParser::tkObjectEnd) { + in_.advance(); + parser_.popRepeater(); + parser_.advance(Symbol::sMapEnd); + return 0; + } + parser_.nextRepeatCount(1); + return 1; +} + +template <typename P> +size_t JsonDecoder<P>::skipMap() +{ + parser_.advance(Symbol::sMapStart); + parser_.pop(); + parser_.advance(Symbol::sMapEnd); + expect(JsonParser::tkObjectStart); + skipComposite(); + return 0; +} + +template <typename P> +size_t JsonDecoder<P>::decodeUnionIndex() +{ + parser_.advance(Symbol::sUnion); + + size_t result; + if (in_.peek() == JsonParser::tkNull) { + result = parser_.indexForName("null"); + } else { + expect(JsonParser::tkObjectStart); + expect(JsonParser::tkString); + result = parser_.indexForName(in_.stringValue()); + } + parser_.selectBranch(result); + return result; +} + +template<typename F = JsonNullFormatter> +class JsonHandler { + JsonGenerator<F>& generator_; +public: + JsonHandler(JsonGenerator<F>& g) : generator_(g) { } + size_t handle(const Symbol& s) { + switch (s.kind()) { + case Symbol::sRecordStart: + generator_.objectStart(); + break; + case Symbol::sRecordEnd: + generator_.objectEnd(); + break; + case Symbol::sField: + generator_.encodeString(s.extra<string>()); + break; + default: + break; + } + return 0; + } +}; + +template <typename P, typename F = JsonNullFormatter> +class JsonEncoder : public Encoder { + JsonGenerator<F> out_; + JsonHandler<F> handler_; + P parser_; + + void init(OutputStream& os); + void flush(); + int64_t byteCount() const; + void encodeNull(); + void encodeBool(bool b); + void encodeInt(int32_t i); + void encodeLong(int64_t l); + void encodeFloat(float f); + void encodeDouble(double d); + void encodeString(const std::string& s); + void encodeBytes(const uint8_t *bytes, size_t len); + void encodeFixed(const uint8_t *bytes, size_t len); + void encodeEnum(size_t e); + void arrayStart(); + void arrayEnd(); + void mapStart(); + void mapEnd(); + void setItemCount(size_t count); + void startItem(); + void encodeUnionIndex(size_t e); +public: + JsonEncoder(const ValidSchema& schema) : + handler_(out_), + parser_(JsonGrammarGenerator().generate(schema), NULL, handler_) { } +}; + +template<typename P, typename F> +void JsonEncoder<P, F>::init(OutputStream& os) +{ + out_.init(os); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::flush() +{ + parser_.processImplicitActions(); + out_.flush(); +} + +template<typename P, typename F> +int64_t JsonEncoder<P, F>::byteCount() const +{ + return out_.byteCount(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeNull() +{ + parser_.advance(Symbol::sNull); + out_.encodeNull(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeBool(bool b) +{ + parser_.advance(Symbol::sBool); + out_.encodeBool(b); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeInt(int32_t i) +{ + parser_.advance(Symbol::sInt); + out_.encodeNumber(i); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeLong(int64_t l) +{ + parser_.advance(Symbol::sLong); + out_.encodeNumber(l); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeFloat(float f) +{ + parser_.advance(Symbol::sFloat); + if (f == std::numeric_limits<float>::infinity()) { + out_.encodeString("Infinity"); + } else if (f == -std::numeric_limits<float>::infinity()) { + out_.encodeString("-Infinity"); + } else if (boost::math::isnan(f)) { + out_.encodeString("NaN"); + } else { + out_.encodeNumber(f); + } +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeDouble(double d) +{ + parser_.advance(Symbol::sDouble); + if (d == std::numeric_limits<double>::infinity()) { + out_.encodeString("Infinity"); + } else if (d == -std::numeric_limits<double>::infinity()) { + out_.encodeString("-Infinity"); + } else if (boost::math::isnan(d)) { + out_.encodeString("NaN"); + } else { + out_.encodeNumber(d); + } +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeString(const std::string& s) +{ + parser_.advance(Symbol::sString); + out_.encodeString(s); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeBytes(const uint8_t *bytes, size_t len) +{ + parser_.advance(Symbol::sBytes); + out_.encodeBinary(bytes, len); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeFixed(const uint8_t *bytes, size_t len) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(len); + out_.encodeBinary(bytes, len); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeEnum(size_t e) +{ + parser_.advance(Symbol::sEnum); + const string& s = parser_.nameForIndex(e); + out_.encodeString(s); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::arrayStart() +{ + parser_.advance(Symbol::sArrayStart); + parser_.pushRepeatCount(0); + out_.arrayStart(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::arrayEnd() +{ + parser_.popRepeater(); + parser_.advance(Symbol::sArrayEnd); + out_.arrayEnd(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::mapStart() +{ + parser_.advance(Symbol::sMapStart); + parser_.pushRepeatCount(0); + out_.objectStart(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::mapEnd() +{ + parser_.popRepeater(); + parser_.advance(Symbol::sMapEnd); + out_.objectEnd(); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::setItemCount(size_t count) +{ + parser_.nextRepeatCount(count); +} + +template<typename P, typename F> +void JsonEncoder<P, F>::startItem() +{ + parser_.processImplicitActions(); + if (parser_.top() != Symbol::sRepeater) { + throw Exception("startItem at not an item boundary"); + } +} + +template<typename P, typename F> +void JsonEncoder<P, F>::encodeUnionIndex(size_t e) +{ + parser_.advance(Symbol::sUnion); + + const std::string name = parser_.nameForIndex(e); + + if (name != "null") { + out_.objectStart(); + out_.encodeString(name); + } + parser_.selectBranch(e); +} + +} // namespace parsing + +DecoderPtr jsonDecoder(const ValidSchema& s) +{ + return std::make_shared<parsing::JsonDecoder< + parsing::SimpleParser<parsing::JsonDecoderHandler> > >(s); +} + +EncoderPtr jsonEncoder(const ValidSchema& schema) +{ + return std::make_shared<parsing::JsonEncoder< + parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonNullFormatter> >, avro::json::JsonNullFormatter> >(schema); +} + +EncoderPtr jsonPrettyEncoder(const ValidSchema& schema) +{ + return std::make_shared<parsing::JsonEncoder< + parsing::SimpleParser<parsing::JsonHandler<avro::json::JsonPrettyFormatter> >, avro::json::JsonPrettyFormatter> >(schema); +} + +} // namespace avro + diff --git a/contrib/libs/apache/avro/impl/parsing/ResolvingDecoder.cc b/contrib/libs/apache/avro/impl/parsing/ResolvingDecoder.cc index f6dbacabcf..2e33eaa8d0 100644 --- a/contrib/libs/apache/avro/impl/parsing/ResolvingDecoder.cc +++ b/contrib/libs/apache/avro/impl/parsing/ResolvingDecoder.cc @@ -1,740 +1,740 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define __STDC_LIMIT_MACROS - -#include <string> -#include <stack> -#include <map> -#include <algorithm> -#include <memory> -#include <ctype.h> - -#include "ValidatingCodec.hh" -#include "Symbol.hh" -#include "Types.hh" -#include "ValidSchema.hh" -#include "Decoder.hh" -#include "Encoder.hh" -#include "NodeImpl.hh" -#include "Generic.hh" -#include "Stream.hh" - -namespace avro { - -using std::make_shared; - -namespace parsing { - -using std::shared_ptr; -using std::static_pointer_cast; -using std::make_shared; - -using std::unique_ptr; -using std::map; -using std::pair; -using std::vector; -using std::string; -using std::reverse; -using std::ostringstream; -using std::istringstream; -using std::stack; -using std::find_if; -using std::make_pair; - -typedef pair<NodePtr, NodePtr> NodePair; - -class ResolvingGrammarGenerator : public ValidatingGrammarGenerator { - ProductionPtr doGenerate2(const NodePtr& writer, - const NodePtr& reader, map<NodePair, ProductionPtr> &m, - map<NodePtr, ProductionPtr> &m2); - ProductionPtr resolveRecords(const NodePtr& writer, - const NodePtr& reader, map<NodePair, ProductionPtr> &m, - map<NodePtr, ProductionPtr> &m2); - ProductionPtr resolveUnion(const NodePtr& writer, - const NodePtr& reader, map<NodePair, ProductionPtr> &m, - map<NodePtr, ProductionPtr> &m2); - - static vector<pair<string, size_t> > fields(const NodePtr& n) { - vector<pair<string, size_t> > result; - size_t c = n->names(); - for (size_t i = 0; i < c; ++i) { - result.push_back(make_pair(n->nameAt(i), i)); - } - return result; - } - - static int bestBranch(const NodePtr& writer, const NodePtr& reader); - - ProductionPtr getWriterProduction(const NodePtr& n, - map<NodePtr, ProductionPtr>& m2); - -public: - Symbol generate( - const ValidSchema& writer, const ValidSchema& reader); -}; - -Symbol ResolvingGrammarGenerator::generate( - const ValidSchema& writer, const ValidSchema& reader) { - map<NodePtr, ProductionPtr> m2; - - const NodePtr& rr = reader.root(); - const NodePtr& rw = writer.root(); - ProductionPtr backup = ValidatingGrammarGenerator::doGenerate(rw, m2); - fixup(backup, m2); - - map<NodePair, ProductionPtr> m; - ProductionPtr main = doGenerate2(rw, rr, m, m2); - fixup(main, m); - return Symbol::rootSymbol(main, backup); -} - -int ResolvingGrammarGenerator::bestBranch(const NodePtr& writer, - const NodePtr& reader) -{ - Type t = writer->type(); - - const size_t c = reader->leaves(); - for (size_t j = 0; j < c; ++j) { - NodePtr r = reader->leafAt(j); - if (r->type() == AVRO_SYMBOLIC) { - r = resolveSymbol(r); - } - if (t == r->type()) { - if (r->hasName()) { - if (r->name() == writer->name()) { - return j; - } - } else { - return j; - } - } - } - - for (size_t j = 0; j < c; ++j) { - const NodePtr& r = reader->leafAt(j); - Type rt = r->type(); - switch (t) { - case AVRO_INT: - if (rt == AVRO_LONG || rt == AVRO_DOUBLE || rt == AVRO_FLOAT) { - return j; - } - break; - case AVRO_LONG: - case AVRO_FLOAT: - if (rt == AVRO_DOUBLE) { - return j; - } - break; - default: - break; - } - } - return -1; -} - -static shared_ptr<vector<uint8_t> > getAvroBinary( - const GenericDatum& defaultValue) -{ - EncoderPtr e = binaryEncoder(); - unique_ptr<OutputStream> os = memoryOutputStream(); - e->init(*os); - GenericWriter::write(*e, defaultValue); - e->flush(); - return snapshot(*os); -} - -template<typename T1, typename T2> -struct equalsFirst -{ - const T1& v_; - equalsFirst(const T1& v) : v_(v) { } - bool operator()(const pair<T1, T2>& p) { - return p.first == v_; - } -}; - -ProductionPtr ResolvingGrammarGenerator::getWriterProduction( - const NodePtr& n, map<NodePtr, ProductionPtr>& m2) -{ - const NodePtr& nn = (n->type() == AVRO_SYMBOLIC) ? - static_cast<const NodeSymbolic& >(*n).getNode() : n; - map<NodePtr, ProductionPtr>::const_iterator it2 = m2.find(nn); - if (it2 != m2.end()) { - return it2->second; - } else { - ProductionPtr result = ValidatingGrammarGenerator::doGenerate(nn, m2); - fixup(result, m2); - return result; - } -} - -ProductionPtr ResolvingGrammarGenerator::resolveRecords( - const NodePtr& writer, const NodePtr& reader, - map<NodePair, ProductionPtr>& m, - map<NodePtr, ProductionPtr>& m2) -{ - ProductionPtr result = make_shared<Production>(); - - vector<pair<string, size_t> > wf = fields(writer); - vector<pair<string, size_t> > rf = fields(reader); - vector<size_t> fieldOrder; - fieldOrder.reserve(reader->names()); - - /* - * We look for all writer fields in the reader. If found, recursively - * resolve the corresponding fields. Then erase the reader field. - * If no matching field is found for reader, arrange to skip the writer - * field. - */ - for (vector<pair<string, size_t> >::const_iterator it = wf.begin(); - it != wf.end(); ++it) { - vector<pair<string, size_t> >::iterator it2 = - find_if(rf.begin(), rf.end(), - equalsFirst<string, size_t>(it->first)); - if (it2 != rf.end()) { - ProductionPtr p = doGenerate2(writer->leafAt(it->second), - reader->leafAt(it2->second), m, m2); - copy(p->rbegin(), p->rend(), back_inserter(*result)); - fieldOrder.push_back(it2->second); - rf.erase(it2); - } else { - ProductionPtr p = getWriterProduction( - writer->leafAt(it->second), m2); - result->push_back(Symbol::skipStart()); - if (p->size() == 1) { - result->push_back((*p)[0]); - } else { - result->push_back(Symbol::indirect(p)); - } - } - } - - /* - * Examine the reader fields left out, (i.e. those didn't have corresponding - * writer field). - */ - for (vector<pair<string, size_t> >::const_iterator it = rf.begin(); - it != rf.end(); ++it) { - - NodePtr s = reader->leafAt(it->second); - fieldOrder.push_back(it->second); - - if (s->type() == AVRO_SYMBOLIC) { - s = resolveSymbol(s); - } - shared_ptr<vector<uint8_t> > defaultBinary = - getAvroBinary(reader->defaultValueAt(it->second)); - result->push_back(Symbol::defaultStartAction(defaultBinary)); - map<NodePair, shared_ptr<Production> >::const_iterator it2 = - m.find(NodePair(s, s)); - ProductionPtr p = (it2 == m.end()) ? - doGenerate2(s, s, m, m2) : it2->second; - copy(p->rbegin(), p->rend(), back_inserter(*result)); - result->push_back(Symbol::defaultEndAction()); - } - reverse(result->begin(), result->end()); - result->push_back(Symbol::sizeListAction(fieldOrder)); - result->push_back(Symbol::recordAction()); - - return result; - -} - -ProductionPtr ResolvingGrammarGenerator::resolveUnion( - const NodePtr& writer, const NodePtr& reader, - map<NodePair, ProductionPtr>& m, - map<NodePtr, ProductionPtr>& m2) -{ - vector<ProductionPtr> v; - size_t c = writer->leaves(); - v.reserve(c); - for (size_t i = 0; i < c; ++i) { - ProductionPtr p = doGenerate2(writer->leafAt(i), reader, m, m2); - v.push_back(p); - } - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::alternative(v)); - result->push_back(Symbol::writerUnionAction()); - return result; -} - -ProductionPtr ResolvingGrammarGenerator::doGenerate2( - const NodePtr& w, const NodePtr& r, - map<NodePair, ProductionPtr> &m, - map<NodePtr, ProductionPtr> &m2) -{ - const NodePtr writer = w->type() == AVRO_SYMBOLIC ? resolveSymbol(w) : w; - const NodePtr reader = r->type() == AVRO_SYMBOLIC ? resolveSymbol(r) : r; - Type writerType = writer->type(); - Type readerType = reader->type(); - - if (writerType == readerType) { - switch (writerType) { - case AVRO_NULL: - return make_shared<Production>(1, Symbol::nullSymbol()); - case AVRO_BOOL: - return make_shared<Production>(1, Symbol::boolSymbol()); - case AVRO_INT: - return make_shared<Production>(1, Symbol::intSymbol()); - case AVRO_LONG: - return make_shared<Production>(1, Symbol::longSymbol()); - case AVRO_FLOAT: - return make_shared<Production>(1, Symbol::floatSymbol()); - case AVRO_DOUBLE: - return make_shared<Production>(1, Symbol::doubleSymbol()); - case AVRO_STRING: - return make_shared<Production>(1, Symbol::stringSymbol()); - case AVRO_BYTES: - return make_shared<Production>(1, Symbol::bytesSymbol()); - case AVRO_FIXED: - if (writer->name() == reader->name() && - writer->fixedSize() == reader->fixedSize()) { - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize())); - result->push_back(Symbol::fixedSymbol()); - m[make_pair(writer, reader)] = result; - return result; - } - break; - case AVRO_RECORD: - if (writer->name() == reader->name()) { - const pair<NodePtr, NodePtr> key(writer, reader); - map<NodePair, ProductionPtr>::const_iterator kp = m.find(key); - if (kp != m.end()) { - return (kp->second) ? kp->second : - make_shared<Production>(1, Symbol::placeholder(key)); - } - m[key] = ProductionPtr(); - ProductionPtr result = resolveRecords(writer, reader, m, m2); - m[key] = result; - return make_shared<Production>(1, Symbol::indirect(result)); - } - break; - - case AVRO_ENUM: - if (writer->name() == reader->name()) { - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::enumAdjustSymbol(writer, reader)); - result->push_back(Symbol::enumSymbol()); - m[make_pair(writer, reader)] = result; - return result; - } - break; - - case AVRO_ARRAY: - { - ProductionPtr p = getWriterProduction(writer->leafAt(0), m2); - ProductionPtr p2 = doGenerate2(writer->leafAt(0), reader->leafAt(0), m, m2); - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::arrayEndSymbol()); - result->push_back(Symbol::repeater(p2, p, true)); - result->push_back(Symbol::arrayStartSymbol()); - return result; - } - case AVRO_MAP: - { - ProductionPtr pp = - doGenerate2(writer->leafAt(1),reader->leafAt(1), m, m2); - ProductionPtr v(new Production(*pp)); - v->push_back(Symbol::stringSymbol()); - - ProductionPtr pp2 = getWriterProduction(writer->leafAt(1), m2); - ProductionPtr v2(new Production(*pp2)); - - v2->push_back(Symbol::stringSymbol()); - - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::mapEndSymbol()); - result->push_back(Symbol::repeater(v, v2, false)); - result->push_back(Symbol::mapStartSymbol()); - return result; - } - case AVRO_UNION: - return resolveUnion(writer, reader, m, m2); - case AVRO_SYMBOLIC: - { - shared_ptr<NodeSymbolic> w = - static_pointer_cast<NodeSymbolic>(writer); - shared_ptr<NodeSymbolic> r = - static_pointer_cast<NodeSymbolic>(reader); - NodePair p(w->getNode(), r->getNode()); - map<NodePair, ProductionPtr>::iterator it = m.find(p); - if (it != m.end() && it->second) { - return it->second; - } else { - m[p] = ProductionPtr(); - return make_shared<Production>(1, Symbol::placeholder(p)); - } - } - default: - throw Exception("Unknown node type"); - } - } else if (writerType == AVRO_UNION) { - return resolveUnion(writer, reader, m, m2); - } else { - switch (readerType) { - case AVRO_LONG: - if (writerType == AVRO_INT) { - return make_shared<Production>(1, - Symbol::resolveSymbol(Symbol::sInt, Symbol::sLong)); - } - break; - case AVRO_FLOAT: - if (writerType == AVRO_INT || writerType == AVRO_LONG) { - return make_shared<Production>(1, - Symbol::resolveSymbol(writerType == AVRO_INT ? - Symbol::sInt : Symbol::sLong, Symbol::sFloat)); - } - break; - case AVRO_DOUBLE: - if (writerType == AVRO_INT || writerType == AVRO_LONG - || writerType == AVRO_FLOAT) { - return make_shared<Production>(1, - Symbol::resolveSymbol(writerType == AVRO_INT ? - Symbol::sInt : writerType == AVRO_LONG ? - Symbol::sLong : Symbol::sFloat, Symbol::sDouble)); - } - break; - - case AVRO_UNION: - { - int j = bestBranch(writer, reader); - if (j >= 0) { - ProductionPtr p = doGenerate2(writer, reader->leafAt(j), m, m2); - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::unionAdjustSymbol(j, p)); - result->push_back(Symbol::unionSymbol()); - return result; - } - } - break; - case AVRO_NULL: - case AVRO_BOOL: - case AVRO_INT: - case AVRO_STRING: - case AVRO_BYTES: - case AVRO_ENUM: - case AVRO_ARRAY: - case AVRO_MAP: - case AVRO_RECORD: - break; - default: - throw Exception("Unknown node type"); - } - } - return make_shared<Production>(1, Symbol::error(writer, reader)); -} - -class ResolvingDecoderHandler { - shared_ptr<vector<uint8_t> > defaultData_; - unique_ptr<InputStream> inp_; - DecoderPtr backup_; - DecoderPtr& base_; - const DecoderPtr binDecoder; - public: - ResolvingDecoderHandler(DecoderPtr& base) : base_(base), - binDecoder(binaryDecoder()) { } - size_t handle(const Symbol& s) { - switch (s.kind()) { - case Symbol::sWriterUnion: - return base_->decodeUnionIndex(); - case Symbol::sDefaultStart: - defaultData_ = s.extra<shared_ptr<vector<uint8_t> > >(); - backup_ = base_; - inp_ = memoryInputStream(&(*defaultData_)[0], defaultData_->size()); - base_ = binDecoder; - base_->init(*inp_); - return 0; - case Symbol::sDefaultEnd: - base_= backup_; - backup_.reset(); - return 0; - default: - return 0; - } - } - - void reset() - { - if (backup_ != NULL) - { - base_= backup_; - backup_.reset(); - } - } -}; - -template <typename Parser> -class ResolvingDecoderImpl : public ResolvingDecoder -{ - DecoderPtr base_; - ResolvingDecoderHandler handler_; - Parser parser_; - - void init(InputStream& is); - void decodeNull(); - bool decodeBool(); - int32_t decodeInt(); - int64_t decodeLong(); - float decodeFloat(); - double decodeDouble(); - void decodeString(string& value); - void skipString(); - void decodeBytes(vector<uint8_t>& value); - void skipBytes(); - void decodeFixed(size_t n, vector<uint8_t>& value); - void skipFixed(size_t n); - size_t decodeEnum(); - size_t arrayStart(); - size_t arrayNext(); - size_t skipArray(); - size_t mapStart(); - size_t mapNext(); - size_t skipMap(); - size_t decodeUnionIndex(); - const vector<size_t>& fieldOrder(); - void drain() { - parser_.processImplicitActions(); - base_->drain(); - } -public: - ResolvingDecoderImpl(const ValidSchema& writer, const ValidSchema& reader, - const DecoderPtr& base) : - base_(base), - handler_(base_), - parser_(ResolvingGrammarGenerator().generate(writer, reader), - &(*base_), handler_) - { - } -}; - -template <typename P> -void ResolvingDecoderImpl<P>::init(InputStream& is) -{ - handler_.reset(); - base_->init(is); - parser_.reset(); -} - -template <typename P> -void ResolvingDecoderImpl<P>::decodeNull() -{ - parser_.advance(Symbol::sNull); - base_->decodeNull(); -} - -template <typename P> -bool ResolvingDecoderImpl<P>::decodeBool() -{ - parser_.advance(Symbol::sBool); - return base_->decodeBool(); -} - -template <typename P> -int32_t ResolvingDecoderImpl<P>::decodeInt() -{ - parser_.advance(Symbol::sInt); - return base_->decodeInt(); -} - -template <typename P> -int64_t ResolvingDecoderImpl<P>::decodeLong() -{ - Symbol::Kind k = parser_.advance(Symbol::sLong); - return k == Symbol::sInt ? base_->decodeInt() : base_->decodeLong(); -} - -template <typename P> -float ResolvingDecoderImpl<P>::decodeFloat() -{ - Symbol::Kind k = parser_.advance(Symbol::sFloat); - return k == Symbol::sInt ? base_->decodeInt() : - k == Symbol::sLong ? base_->decodeLong() : - base_->decodeFloat(); -} - -template <typename P> -double ResolvingDecoderImpl<P>::decodeDouble() -{ - Symbol::Kind k = parser_.advance(Symbol::sDouble); - return k == Symbol::sInt ? base_->decodeInt() : - k == Symbol::sLong ? base_->decodeLong() : - k == Symbol::sFloat ? base_->decodeFloat() : - base_->decodeDouble(); -} - -template <typename P> -void ResolvingDecoderImpl<P>::decodeString(string& value) -{ - parser_.advance(Symbol::sString); - base_->decodeString(value); -} - -template <typename P> -void ResolvingDecoderImpl<P>::skipString() -{ - parser_.advance(Symbol::sString); - base_->skipString(); -} - -template <typename P> -void ResolvingDecoderImpl<P>::decodeBytes(vector<uint8_t>& value) -{ - parser_.advance(Symbol::sBytes); - base_->decodeBytes(value); -} - -template <typename P> -void ResolvingDecoderImpl<P>::skipBytes() -{ - parser_.advance(Symbol::sBytes); - base_->skipBytes(); -} - -template <typename P> -void ResolvingDecoderImpl<P>::decodeFixed(size_t n, vector<uint8_t>& value) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(n); - return base_->decodeFixed(n, value); -} - -template <typename P> -void ResolvingDecoderImpl<P>::skipFixed(size_t n) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(n); - base_->skipFixed(n); -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::decodeEnum() -{ - parser_.advance(Symbol::sEnum); - size_t n = base_->decodeEnum(); - return parser_.enumAdjust(n); -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::arrayStart() -{ - parser_.advance(Symbol::sArrayStart); - size_t result = base_->arrayStart(); - parser_.pushRepeatCount(result); - if (result == 0) { - parser_.popRepeater(); - parser_.advance(Symbol::sArrayEnd); - } - return result; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::arrayNext() -{ - parser_.processImplicitActions(); - size_t result = base_->arrayNext(); - parser_.nextRepeatCount(result); - if (result == 0) { - parser_.popRepeater(); - parser_.advance(Symbol::sArrayEnd); - } - return result; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::skipArray() -{ - parser_.advance(Symbol::sArrayStart); - size_t n = base_->skipArray(); - if (n == 0) { - parser_.pop(); - } else { - parser_.pushRepeatCount(n); - parser_.skip(*base_); - } - parser_.advance(Symbol::sArrayEnd); - return 0; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::mapStart() -{ - parser_.advance(Symbol::sMapStart); - size_t result = base_->mapStart(); - parser_.pushRepeatCount(result); - if (result == 0) { - parser_.popRepeater(); - parser_.advance(Symbol::sMapEnd); - } - return result; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::mapNext() -{ - parser_.processImplicitActions(); - size_t result = base_->mapNext(); - parser_.nextRepeatCount(result); - if (result == 0) { - parser_.popRepeater(); - parser_.advance(Symbol::sMapEnd); - } - return result; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::skipMap() -{ - parser_.advance(Symbol::sMapStart); - size_t n = base_->skipMap(); - if (n == 0) { - parser_.pop(); - } else { - parser_.pushRepeatCount(n); - parser_.skip(*base_); - } - parser_.advance(Symbol::sMapEnd); - return 0; -} - -template <typename P> -size_t ResolvingDecoderImpl<P>::decodeUnionIndex() -{ - parser_.advance(Symbol::sUnion); - return parser_.unionAdjust(); -} - -template <typename P> -const vector<size_t>& ResolvingDecoderImpl<P>::fieldOrder() -{ - parser_.advance(Symbol::sRecord); - return parser_.sizeList(); -} - -} // namespace parsing - -ResolvingDecoderPtr resolvingDecoder(const ValidSchema& writer, - const ValidSchema& reader, const DecoderPtr& base) { - return make_shared<parsing::ResolvingDecoderImpl - <parsing::SimpleParser<parsing::ResolvingDecoderHandler> > >( - writer, reader, base); -} - -} // namespace avro - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define __STDC_LIMIT_MACROS + +#include <string> +#include <stack> +#include <map> +#include <algorithm> +#include <memory> +#include <ctype.h> + +#include "ValidatingCodec.hh" +#include "Symbol.hh" +#include "Types.hh" +#include "ValidSchema.hh" +#include "Decoder.hh" +#include "Encoder.hh" +#include "NodeImpl.hh" +#include "Generic.hh" +#include "Stream.hh" + +namespace avro { + +using std::make_shared; + +namespace parsing { + +using std::shared_ptr; +using std::static_pointer_cast; +using std::make_shared; + +using std::unique_ptr; +using std::map; +using std::pair; +using std::vector; +using std::string; +using std::reverse; +using std::ostringstream; +using std::istringstream; +using std::stack; +using std::find_if; +using std::make_pair; + +typedef pair<NodePtr, NodePtr> NodePair; + +class ResolvingGrammarGenerator : public ValidatingGrammarGenerator { + ProductionPtr doGenerate2(const NodePtr& writer, + const NodePtr& reader, map<NodePair, ProductionPtr> &m, + map<NodePtr, ProductionPtr> &m2); + ProductionPtr resolveRecords(const NodePtr& writer, + const NodePtr& reader, map<NodePair, ProductionPtr> &m, + map<NodePtr, ProductionPtr> &m2); + ProductionPtr resolveUnion(const NodePtr& writer, + const NodePtr& reader, map<NodePair, ProductionPtr> &m, + map<NodePtr, ProductionPtr> &m2); + + static vector<pair<string, size_t> > fields(const NodePtr& n) { + vector<pair<string, size_t> > result; + size_t c = n->names(); + for (size_t i = 0; i < c; ++i) { + result.push_back(make_pair(n->nameAt(i), i)); + } + return result; + } + + static int bestBranch(const NodePtr& writer, const NodePtr& reader); + + ProductionPtr getWriterProduction(const NodePtr& n, + map<NodePtr, ProductionPtr>& m2); + +public: + Symbol generate( + const ValidSchema& writer, const ValidSchema& reader); +}; + +Symbol ResolvingGrammarGenerator::generate( + const ValidSchema& writer, const ValidSchema& reader) { + map<NodePtr, ProductionPtr> m2; + + const NodePtr& rr = reader.root(); + const NodePtr& rw = writer.root(); + ProductionPtr backup = ValidatingGrammarGenerator::doGenerate(rw, m2); + fixup(backup, m2); + + map<NodePair, ProductionPtr> m; + ProductionPtr main = doGenerate2(rw, rr, m, m2); + fixup(main, m); + return Symbol::rootSymbol(main, backup); +} + +int ResolvingGrammarGenerator::bestBranch(const NodePtr& writer, + const NodePtr& reader) +{ + Type t = writer->type(); + + const size_t c = reader->leaves(); + for (size_t j = 0; j < c; ++j) { + NodePtr r = reader->leafAt(j); + if (r->type() == AVRO_SYMBOLIC) { + r = resolveSymbol(r); + } + if (t == r->type()) { + if (r->hasName()) { + if (r->name() == writer->name()) { + return j; + } + } else { + return j; + } + } + } + + for (size_t j = 0; j < c; ++j) { + const NodePtr& r = reader->leafAt(j); + Type rt = r->type(); + switch (t) { + case AVRO_INT: + if (rt == AVRO_LONG || rt == AVRO_DOUBLE || rt == AVRO_FLOAT) { + return j; + } + break; + case AVRO_LONG: + case AVRO_FLOAT: + if (rt == AVRO_DOUBLE) { + return j; + } + break; + default: + break; + } + } + return -1; +} + +static shared_ptr<vector<uint8_t> > getAvroBinary( + const GenericDatum& defaultValue) +{ + EncoderPtr e = binaryEncoder(); + unique_ptr<OutputStream> os = memoryOutputStream(); + e->init(*os); + GenericWriter::write(*e, defaultValue); + e->flush(); + return snapshot(*os); +} + +template<typename T1, typename T2> +struct equalsFirst +{ + const T1& v_; + equalsFirst(const T1& v) : v_(v) { } + bool operator()(const pair<T1, T2>& p) { + return p.first == v_; + } +}; + +ProductionPtr ResolvingGrammarGenerator::getWriterProduction( + const NodePtr& n, map<NodePtr, ProductionPtr>& m2) +{ + const NodePtr& nn = (n->type() == AVRO_SYMBOLIC) ? + static_cast<const NodeSymbolic& >(*n).getNode() : n; + map<NodePtr, ProductionPtr>::const_iterator it2 = m2.find(nn); + if (it2 != m2.end()) { + return it2->second; + } else { + ProductionPtr result = ValidatingGrammarGenerator::doGenerate(nn, m2); + fixup(result, m2); + return result; + } +} + +ProductionPtr ResolvingGrammarGenerator::resolveRecords( + const NodePtr& writer, const NodePtr& reader, + map<NodePair, ProductionPtr>& m, + map<NodePtr, ProductionPtr>& m2) +{ + ProductionPtr result = make_shared<Production>(); + + vector<pair<string, size_t> > wf = fields(writer); + vector<pair<string, size_t> > rf = fields(reader); + vector<size_t> fieldOrder; + fieldOrder.reserve(reader->names()); + + /* + * We look for all writer fields in the reader. If found, recursively + * resolve the corresponding fields. Then erase the reader field. + * If no matching field is found for reader, arrange to skip the writer + * field. + */ + for (vector<pair<string, size_t> >::const_iterator it = wf.begin(); + it != wf.end(); ++it) { + vector<pair<string, size_t> >::iterator it2 = + find_if(rf.begin(), rf.end(), + equalsFirst<string, size_t>(it->first)); + if (it2 != rf.end()) { + ProductionPtr p = doGenerate2(writer->leafAt(it->second), + reader->leafAt(it2->second), m, m2); + copy(p->rbegin(), p->rend(), back_inserter(*result)); + fieldOrder.push_back(it2->second); + rf.erase(it2); + } else { + ProductionPtr p = getWriterProduction( + writer->leafAt(it->second), m2); + result->push_back(Symbol::skipStart()); + if (p->size() == 1) { + result->push_back((*p)[0]); + } else { + result->push_back(Symbol::indirect(p)); + } + } + } + + /* + * Examine the reader fields left out, (i.e. those didn't have corresponding + * writer field). + */ + for (vector<pair<string, size_t> >::const_iterator it = rf.begin(); + it != rf.end(); ++it) { + + NodePtr s = reader->leafAt(it->second); + fieldOrder.push_back(it->second); + + if (s->type() == AVRO_SYMBOLIC) { + s = resolveSymbol(s); + } + shared_ptr<vector<uint8_t> > defaultBinary = + getAvroBinary(reader->defaultValueAt(it->second)); + result->push_back(Symbol::defaultStartAction(defaultBinary)); + map<NodePair, shared_ptr<Production> >::const_iterator it2 = + m.find(NodePair(s, s)); + ProductionPtr p = (it2 == m.end()) ? + doGenerate2(s, s, m, m2) : it2->second; + copy(p->rbegin(), p->rend(), back_inserter(*result)); + result->push_back(Symbol::defaultEndAction()); + } + reverse(result->begin(), result->end()); + result->push_back(Symbol::sizeListAction(fieldOrder)); + result->push_back(Symbol::recordAction()); + + return result; + +} + +ProductionPtr ResolvingGrammarGenerator::resolveUnion( + const NodePtr& writer, const NodePtr& reader, + map<NodePair, ProductionPtr>& m, + map<NodePtr, ProductionPtr>& m2) +{ + vector<ProductionPtr> v; + size_t c = writer->leaves(); + v.reserve(c); + for (size_t i = 0; i < c; ++i) { + ProductionPtr p = doGenerate2(writer->leafAt(i), reader, m, m2); + v.push_back(p); + } + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::alternative(v)); + result->push_back(Symbol::writerUnionAction()); + return result; +} + +ProductionPtr ResolvingGrammarGenerator::doGenerate2( + const NodePtr& w, const NodePtr& r, + map<NodePair, ProductionPtr> &m, + map<NodePtr, ProductionPtr> &m2) +{ + const NodePtr writer = w->type() == AVRO_SYMBOLIC ? resolveSymbol(w) : w; + const NodePtr reader = r->type() == AVRO_SYMBOLIC ? resolveSymbol(r) : r; + Type writerType = writer->type(); + Type readerType = reader->type(); + + if (writerType == readerType) { + switch (writerType) { + case AVRO_NULL: + return make_shared<Production>(1, Symbol::nullSymbol()); + case AVRO_BOOL: + return make_shared<Production>(1, Symbol::boolSymbol()); + case AVRO_INT: + return make_shared<Production>(1, Symbol::intSymbol()); + case AVRO_LONG: + return make_shared<Production>(1, Symbol::longSymbol()); + case AVRO_FLOAT: + return make_shared<Production>(1, Symbol::floatSymbol()); + case AVRO_DOUBLE: + return make_shared<Production>(1, Symbol::doubleSymbol()); + case AVRO_STRING: + return make_shared<Production>(1, Symbol::stringSymbol()); + case AVRO_BYTES: + return make_shared<Production>(1, Symbol::bytesSymbol()); + case AVRO_FIXED: + if (writer->name() == reader->name() && + writer->fixedSize() == reader->fixedSize()) { + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::sizeCheckSymbol(reader->fixedSize())); + result->push_back(Symbol::fixedSymbol()); + m[make_pair(writer, reader)] = result; + return result; + } + break; + case AVRO_RECORD: + if (writer->name() == reader->name()) { + const pair<NodePtr, NodePtr> key(writer, reader); + map<NodePair, ProductionPtr>::const_iterator kp = m.find(key); + if (kp != m.end()) { + return (kp->second) ? kp->second : + make_shared<Production>(1, Symbol::placeholder(key)); + } + m[key] = ProductionPtr(); + ProductionPtr result = resolveRecords(writer, reader, m, m2); + m[key] = result; + return make_shared<Production>(1, Symbol::indirect(result)); + } + break; + + case AVRO_ENUM: + if (writer->name() == reader->name()) { + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::enumAdjustSymbol(writer, reader)); + result->push_back(Symbol::enumSymbol()); + m[make_pair(writer, reader)] = result; + return result; + } + break; + + case AVRO_ARRAY: + { + ProductionPtr p = getWriterProduction(writer->leafAt(0), m2); + ProductionPtr p2 = doGenerate2(writer->leafAt(0), reader->leafAt(0), m, m2); + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::arrayEndSymbol()); + result->push_back(Symbol::repeater(p2, p, true)); + result->push_back(Symbol::arrayStartSymbol()); + return result; + } + case AVRO_MAP: + { + ProductionPtr pp = + doGenerate2(writer->leafAt(1),reader->leafAt(1), m, m2); + ProductionPtr v(new Production(*pp)); + v->push_back(Symbol::stringSymbol()); + + ProductionPtr pp2 = getWriterProduction(writer->leafAt(1), m2); + ProductionPtr v2(new Production(*pp2)); + + v2->push_back(Symbol::stringSymbol()); + + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::mapEndSymbol()); + result->push_back(Symbol::repeater(v, v2, false)); + result->push_back(Symbol::mapStartSymbol()); + return result; + } + case AVRO_UNION: + return resolveUnion(writer, reader, m, m2); + case AVRO_SYMBOLIC: + { + shared_ptr<NodeSymbolic> w = + static_pointer_cast<NodeSymbolic>(writer); + shared_ptr<NodeSymbolic> r = + static_pointer_cast<NodeSymbolic>(reader); + NodePair p(w->getNode(), r->getNode()); + map<NodePair, ProductionPtr>::iterator it = m.find(p); + if (it != m.end() && it->second) { + return it->second; + } else { + m[p] = ProductionPtr(); + return make_shared<Production>(1, Symbol::placeholder(p)); + } + } + default: + throw Exception("Unknown node type"); + } + } else if (writerType == AVRO_UNION) { + return resolveUnion(writer, reader, m, m2); + } else { + switch (readerType) { + case AVRO_LONG: + if (writerType == AVRO_INT) { + return make_shared<Production>(1, + Symbol::resolveSymbol(Symbol::sInt, Symbol::sLong)); + } + break; + case AVRO_FLOAT: + if (writerType == AVRO_INT || writerType == AVRO_LONG) { + return make_shared<Production>(1, + Symbol::resolveSymbol(writerType == AVRO_INT ? + Symbol::sInt : Symbol::sLong, Symbol::sFloat)); + } + break; + case AVRO_DOUBLE: + if (writerType == AVRO_INT || writerType == AVRO_LONG + || writerType == AVRO_FLOAT) { + return make_shared<Production>(1, + Symbol::resolveSymbol(writerType == AVRO_INT ? + Symbol::sInt : writerType == AVRO_LONG ? + Symbol::sLong : Symbol::sFloat, Symbol::sDouble)); + } + break; + + case AVRO_UNION: + { + int j = bestBranch(writer, reader); + if (j >= 0) { + ProductionPtr p = doGenerate2(writer, reader->leafAt(j), m, m2); + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::unionAdjustSymbol(j, p)); + result->push_back(Symbol::unionSymbol()); + return result; + } + } + break; + case AVRO_NULL: + case AVRO_BOOL: + case AVRO_INT: + case AVRO_STRING: + case AVRO_BYTES: + case AVRO_ENUM: + case AVRO_ARRAY: + case AVRO_MAP: + case AVRO_RECORD: + break; + default: + throw Exception("Unknown node type"); + } + } + return make_shared<Production>(1, Symbol::error(writer, reader)); +} + +class ResolvingDecoderHandler { + shared_ptr<vector<uint8_t> > defaultData_; + unique_ptr<InputStream> inp_; + DecoderPtr backup_; + DecoderPtr& base_; + const DecoderPtr binDecoder; + public: + ResolvingDecoderHandler(DecoderPtr& base) : base_(base), + binDecoder(binaryDecoder()) { } + size_t handle(const Symbol& s) { + switch (s.kind()) { + case Symbol::sWriterUnion: + return base_->decodeUnionIndex(); + case Symbol::sDefaultStart: + defaultData_ = s.extra<shared_ptr<vector<uint8_t> > >(); + backup_ = base_; + inp_ = memoryInputStream(&(*defaultData_)[0], defaultData_->size()); + base_ = binDecoder; + base_->init(*inp_); + return 0; + case Symbol::sDefaultEnd: + base_= backup_; + backup_.reset(); + return 0; + default: + return 0; + } + } + + void reset() + { + if (backup_ != NULL) + { + base_= backup_; + backup_.reset(); + } + } +}; + +template <typename Parser> +class ResolvingDecoderImpl : public ResolvingDecoder +{ + DecoderPtr base_; + ResolvingDecoderHandler handler_; + Parser parser_; + + void init(InputStream& is); + void decodeNull(); + bool decodeBool(); + int32_t decodeInt(); + int64_t decodeLong(); + float decodeFloat(); + double decodeDouble(); + void decodeString(string& value); + void skipString(); + void decodeBytes(vector<uint8_t>& value); + void skipBytes(); + void decodeFixed(size_t n, vector<uint8_t>& value); + void skipFixed(size_t n); + size_t decodeEnum(); + size_t arrayStart(); + size_t arrayNext(); + size_t skipArray(); + size_t mapStart(); + size_t mapNext(); + size_t skipMap(); + size_t decodeUnionIndex(); + const vector<size_t>& fieldOrder(); + void drain() { + parser_.processImplicitActions(); + base_->drain(); + } +public: + ResolvingDecoderImpl(const ValidSchema& writer, const ValidSchema& reader, + const DecoderPtr& base) : + base_(base), + handler_(base_), + parser_(ResolvingGrammarGenerator().generate(writer, reader), + &(*base_), handler_) + { + } +}; + +template <typename P> +void ResolvingDecoderImpl<P>::init(InputStream& is) +{ + handler_.reset(); + base_->init(is); + parser_.reset(); +} + +template <typename P> +void ResolvingDecoderImpl<P>::decodeNull() +{ + parser_.advance(Symbol::sNull); + base_->decodeNull(); +} + +template <typename P> +bool ResolvingDecoderImpl<P>::decodeBool() +{ + parser_.advance(Symbol::sBool); + return base_->decodeBool(); +} + +template <typename P> +int32_t ResolvingDecoderImpl<P>::decodeInt() +{ + parser_.advance(Symbol::sInt); + return base_->decodeInt(); +} + +template <typename P> +int64_t ResolvingDecoderImpl<P>::decodeLong() +{ + Symbol::Kind k = parser_.advance(Symbol::sLong); + return k == Symbol::sInt ? base_->decodeInt() : base_->decodeLong(); +} + +template <typename P> +float ResolvingDecoderImpl<P>::decodeFloat() +{ + Symbol::Kind k = parser_.advance(Symbol::sFloat); + return k == Symbol::sInt ? base_->decodeInt() : + k == Symbol::sLong ? base_->decodeLong() : + base_->decodeFloat(); +} + +template <typename P> +double ResolvingDecoderImpl<P>::decodeDouble() +{ + Symbol::Kind k = parser_.advance(Symbol::sDouble); + return k == Symbol::sInt ? base_->decodeInt() : + k == Symbol::sLong ? base_->decodeLong() : + k == Symbol::sFloat ? base_->decodeFloat() : + base_->decodeDouble(); +} + +template <typename P> +void ResolvingDecoderImpl<P>::decodeString(string& value) +{ + parser_.advance(Symbol::sString); + base_->decodeString(value); +} + +template <typename P> +void ResolvingDecoderImpl<P>::skipString() +{ + parser_.advance(Symbol::sString); + base_->skipString(); +} + +template <typename P> +void ResolvingDecoderImpl<P>::decodeBytes(vector<uint8_t>& value) +{ + parser_.advance(Symbol::sBytes); + base_->decodeBytes(value); +} + +template <typename P> +void ResolvingDecoderImpl<P>::skipBytes() +{ + parser_.advance(Symbol::sBytes); + base_->skipBytes(); +} + +template <typename P> +void ResolvingDecoderImpl<P>::decodeFixed(size_t n, vector<uint8_t>& value) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(n); + return base_->decodeFixed(n, value); +} + +template <typename P> +void ResolvingDecoderImpl<P>::skipFixed(size_t n) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(n); + base_->skipFixed(n); +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::decodeEnum() +{ + parser_.advance(Symbol::sEnum); + size_t n = base_->decodeEnum(); + return parser_.enumAdjust(n); +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::arrayStart() +{ + parser_.advance(Symbol::sArrayStart); + size_t result = base_->arrayStart(); + parser_.pushRepeatCount(result); + if (result == 0) { + parser_.popRepeater(); + parser_.advance(Symbol::sArrayEnd); + } + return result; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::arrayNext() +{ + parser_.processImplicitActions(); + size_t result = base_->arrayNext(); + parser_.nextRepeatCount(result); + if (result == 0) { + parser_.popRepeater(); + parser_.advance(Symbol::sArrayEnd); + } + return result; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::skipArray() +{ + parser_.advance(Symbol::sArrayStart); + size_t n = base_->skipArray(); + if (n == 0) { + parser_.pop(); + } else { + parser_.pushRepeatCount(n); + parser_.skip(*base_); + } + parser_.advance(Symbol::sArrayEnd); + return 0; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::mapStart() +{ + parser_.advance(Symbol::sMapStart); + size_t result = base_->mapStart(); + parser_.pushRepeatCount(result); + if (result == 0) { + parser_.popRepeater(); + parser_.advance(Symbol::sMapEnd); + } + return result; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::mapNext() +{ + parser_.processImplicitActions(); + size_t result = base_->mapNext(); + parser_.nextRepeatCount(result); + if (result == 0) { + parser_.popRepeater(); + parser_.advance(Symbol::sMapEnd); + } + return result; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::skipMap() +{ + parser_.advance(Symbol::sMapStart); + size_t n = base_->skipMap(); + if (n == 0) { + parser_.pop(); + } else { + parser_.pushRepeatCount(n); + parser_.skip(*base_); + } + parser_.advance(Symbol::sMapEnd); + return 0; +} + +template <typename P> +size_t ResolvingDecoderImpl<P>::decodeUnionIndex() +{ + parser_.advance(Symbol::sUnion); + return parser_.unionAdjust(); +} + +template <typename P> +const vector<size_t>& ResolvingDecoderImpl<P>::fieldOrder() +{ + parser_.advance(Symbol::sRecord); + return parser_.sizeList(); +} + +} // namespace parsing + +ResolvingDecoderPtr resolvingDecoder(const ValidSchema& writer, + const ValidSchema& reader, const DecoderPtr& base) { + return make_shared<parsing::ResolvingDecoderImpl + <parsing::SimpleParser<parsing::ResolvingDecoderHandler> > >( + writer, reader, base); +} + +} // namespace avro + diff --git a/contrib/libs/apache/avro/impl/parsing/Symbol.cc b/contrib/libs/apache/avro/impl/parsing/Symbol.cc index 6eb83309be..b59b965133 100644 --- a/contrib/libs/apache/avro/impl/parsing/Symbol.cc +++ b/contrib/libs/apache/avro/impl/parsing/Symbol.cc @@ -1,111 +1,111 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "Symbol.hh" - -namespace avro { -namespace parsing { - -using std::vector; -using std::string; -using std::ostringstream; - -const char* Symbol::stringValues[] = { - "TerminalLow", - "Null", - "Bool", - "Int", - "Long", - "Float", - "Double", - "String", - "Bytes", - "ArrayStart", - "ArrayEnd", - "MapStart", - "MapEnd", - "Fixed", - "Enum", - "Union", - "TerminalHigh", - "SizeCheck", - "NameList", - "Root", - "Repeater", - "Alternative", - "Placeholder", - "Indirect", - "Symbolic", - "EnumAdjust", - "UnionAdjust", - "SkipStart", - "Resolve", - "ImplicitActionLow", - "RecordStart", - "RecordEnd", - "Field", - "Record", - "SizeList", - "WriterUnion", - "DefaultStart", - "DefaultEnd", - "ImplicitActionHigh", - "Error" -}; - -Symbol Symbol::enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader) -{ - vector<string> rs; - size_t rc = reader->names(); - for (size_t i = 0; i < rc; ++i) { - rs.push_back(reader->nameAt(i)); - } - - size_t wc = writer->names(); - vector<int> adj; - adj.reserve(wc); - - vector<string> err; - - for (size_t i = 0; i < wc; ++i) { - const string& s = writer->nameAt(i); - vector<string>::const_iterator it = find(rs.begin(), rs.end(), s); - if (it == rs.end()) { - int pos = err.size() + 1; - adj.push_back(-pos); - err.push_back(s); - } else { - adj.push_back(it - rs.begin()); - } - } - return Symbol(sEnumAdjust, make_pair(adj, err)); -} - -Symbol Symbol::error(const NodePtr& writer, const NodePtr& reader) -{ - ostringstream oss; - oss << "Cannot resolve: " << std::endl; - writer->printJson(oss, 0); - oss << std::endl << "with" << std::endl; - reader->printJson(oss, 0); - return Symbol(sError, oss.str()); -} - -} // namespace parsing -} // namespace avro +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "Symbol.hh" + +namespace avro { +namespace parsing { + +using std::vector; +using std::string; +using std::ostringstream; + +const char* Symbol::stringValues[] = { + "TerminalLow", + "Null", + "Bool", + "Int", + "Long", + "Float", + "Double", + "String", + "Bytes", + "ArrayStart", + "ArrayEnd", + "MapStart", + "MapEnd", + "Fixed", + "Enum", + "Union", + "TerminalHigh", + "SizeCheck", + "NameList", + "Root", + "Repeater", + "Alternative", + "Placeholder", + "Indirect", + "Symbolic", + "EnumAdjust", + "UnionAdjust", + "SkipStart", + "Resolve", + "ImplicitActionLow", + "RecordStart", + "RecordEnd", + "Field", + "Record", + "SizeList", + "WriterUnion", + "DefaultStart", + "DefaultEnd", + "ImplicitActionHigh", + "Error" +}; + +Symbol Symbol::enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader) +{ + vector<string> rs; + size_t rc = reader->names(); + for (size_t i = 0; i < rc; ++i) { + rs.push_back(reader->nameAt(i)); + } + + size_t wc = writer->names(); + vector<int> adj; + adj.reserve(wc); + + vector<string> err; + + for (size_t i = 0; i < wc; ++i) { + const string& s = writer->nameAt(i); + vector<string>::const_iterator it = find(rs.begin(), rs.end(), s); + if (it == rs.end()) { + int pos = err.size() + 1; + adj.push_back(-pos); + err.push_back(s); + } else { + adj.push_back(it - rs.begin()); + } + } + return Symbol(sEnumAdjust, make_pair(adj, err)); +} + +Symbol Symbol::error(const NodePtr& writer, const NodePtr& reader) +{ + ostringstream oss; + oss << "Cannot resolve: " << std::endl; + writer->printJson(oss, 0); + oss << std::endl << "with" << std::endl; + reader->printJson(oss, 0); + return Symbol(sError, oss.str()); +} + +} // namespace parsing +} // namespace avro diff --git a/contrib/libs/apache/avro/impl/parsing/Symbol.hh b/contrib/libs/apache/avro/impl/parsing/Symbol.hh index f4ecfe6e83..d642341e16 100644 --- a/contrib/libs/apache/avro/impl/parsing/Symbol.hh +++ b/contrib/libs/apache/avro/impl/parsing/Symbol.hh @@ -1,854 +1,854 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef avro_parsing_Symbol_hh__ -#define avro_parsing_Symbol_hh__ - -#include <vector> -#include <map> -#include <set> -#include <stack> -#include <sstream> - -#include <boost/any.hpp> -#include <boost/tuple/tuple.hpp> - -#include "Node.hh" -#include "Decoder.hh" -#include "Exception.hh" - -namespace avro { -namespace parsing { - -class Symbol; - -typedef std::vector<Symbol> Production; -typedef std::shared_ptr<Production> ProductionPtr; -typedef boost::tuple<std::stack<ssize_t>, bool, ProductionPtr, ProductionPtr> RepeaterInfo; -typedef boost::tuple<ProductionPtr, ProductionPtr> RootInfo; - -class Symbol { -public: - enum Kind { - sTerminalLow, // extra has nothing - sNull, - sBool, - sInt, - sLong, - sFloat, - sDouble, - sString, - sBytes, - sArrayStart, - sArrayEnd, - sMapStart, - sMapEnd, - sFixed, - sEnum, - sUnion, - sTerminalHigh, - sSizeCheck, // Extra has size - sNameList, // Extra has a vector<string> - sRoot, // Root for a schema, extra is Symbol - sRepeater, // Array or Map, extra is symbol - sAlternative, // One of many (union), extra is Union - sPlaceholder, // To be fixed up later. - sIndirect, // extra is shared_ptr<Production> - sSymbolic, // extra is weal_ptr<Production> - sEnumAdjust, - sUnionAdjust, - sSkipStart, - sResolve, - - sImplicitActionLow, - sRecordStart, - sRecordEnd, - sField, // extra is string - sRecord, - sSizeList, - sWriterUnion, - sDefaultStart, // extra has default value in Avro binary encoding - sDefaultEnd, - sImplicitActionHigh, - sError - }; - -private: - Kind kind_; - boost::any extra_; - - - explicit Symbol(Kind k) : kind_(k) { } - template <typename T> Symbol(Kind k, T t) : kind_(k), extra_(t) { } -public: - - Kind kind() const { - return kind_; - } - - template <typename T> T extra() const { - return boost::any_cast<T>(extra_); - } - - template <typename T> T* extrap() { - return boost::any_cast<T>(&extra_); - } - - template <typename T> const T* extrap() const { - return boost::any_cast<T>(&extra_); - } - - template <typename T> void extra(const T& t) { - extra_ = t; - } - - bool isTerminal() const { - return kind_ > sTerminalLow && kind_ < sTerminalHigh; - } - - bool isImplicitAction() const { - return kind_ > sImplicitActionLow && kind_ < sImplicitActionHigh; - } - - static const char* stringValues[]; - static const char* toString(Kind k) { - return stringValues[k]; - } - - static Symbol rootSymbol(ProductionPtr& s) - { - return Symbol(Symbol::sRoot, RootInfo(s, std::make_shared<Production>())); - } - - static Symbol rootSymbol(const ProductionPtr& main, - const ProductionPtr& backup) - { - return Symbol(Symbol::sRoot, RootInfo(main, backup)); - } - - static Symbol nullSymbol() { - return Symbol(sNull); - } - - static Symbol boolSymbol() { - return Symbol(sBool); - } - - static Symbol intSymbol() { - return Symbol(sInt); - } - - static Symbol longSymbol() { - return Symbol(sLong); - } - - static Symbol floatSymbol() { - return Symbol(sFloat); - } - - static Symbol doubleSymbol() { - return Symbol(sDouble); - } - - static Symbol stringSymbol() { - return Symbol(sString); - } - - static Symbol bytesSymbol() { - return Symbol(sBytes); - } - - static Symbol sizeCheckSymbol(size_t s) { - return Symbol(sSizeCheck, s); - } - - static Symbol fixedSymbol() { - return Symbol(sFixed); - } - - static Symbol enumSymbol() { - return Symbol(sEnum); - } - - static Symbol arrayStartSymbol() { - return Symbol(sArrayStart); - } - - static Symbol arrayEndSymbol() { - return Symbol(sArrayEnd); - } - - static Symbol mapStartSymbol() { - return Symbol(sMapStart); - } - - static Symbol mapEndSymbol() { - return Symbol(sMapEnd); - } - - static Symbol repeater(const ProductionPtr& p, - bool isArray) { - return repeater(p, p, isArray); - } - - static Symbol repeater(const ProductionPtr& read, - const ProductionPtr& skip, - bool isArray) { - std::stack<ssize_t> s; - return Symbol(sRepeater, RepeaterInfo(s, isArray, read, skip)); - } - - static Symbol defaultStartAction(std::shared_ptr<std::vector<uint8_t> > bb) - { - return Symbol(sDefaultStart, bb); - } - - static Symbol defaultEndAction() - { - return Symbol(sDefaultEnd); - } - - static Symbol alternative( - const std::vector<ProductionPtr>& branches) - { - return Symbol(Symbol::sAlternative, branches); - } - - static Symbol unionSymbol() { - return Symbol(sUnion); - } - - static Symbol recordStartSymbol() { - return Symbol(sRecordStart); - } - - static Symbol recordEndSymbol() { - return Symbol(sRecordEnd); - } - - static Symbol fieldSymbol(const std::string& name) { - return Symbol(sField, name); - } - - static Symbol writerUnionAction() { - return Symbol(sWriterUnion); - } - - static Symbol nameListSymbol( - const std::vector<std::string>& v) { - return Symbol(sNameList, v); - } - - template <typename T> - static Symbol placeholder(const T& n) { - return Symbol(sPlaceholder, n); - } - - static Symbol indirect(const ProductionPtr& p) { - return Symbol(sIndirect, p); - } - - static Symbol symbolic(const std::weak_ptr<Production>& p) { - return Symbol(sSymbolic, p); - } - - static Symbol enumAdjustSymbol(const NodePtr& writer, - const NodePtr& reader); - - static Symbol unionAdjustSymbol(size_t branch, - const ProductionPtr& p) { - return Symbol(sUnionAdjust, std::make_pair(branch, p)); - } - - static Symbol sizeListAction(std::vector<size_t> order) { - return Symbol(sSizeList, order); - } - - static Symbol recordAction() { - return Symbol(sRecord); - } - - static Symbol error(const NodePtr& writer, const NodePtr& reader); - - static Symbol resolveSymbol(Kind w, Kind r) { - return Symbol(sResolve, std::make_pair(w, r)); - } - - static Symbol skipStart() { - return Symbol(sSkipStart); - } - -}; - -/** - * Recursively replaces all placeholders in the production with the - * corresponding values. - */ -template<typename T> -void fixup(const ProductionPtr& p, - const std::map<T, ProductionPtr> &m) -{ - std::set<ProductionPtr> seen; - for (Production::iterator it = p->begin(); it != p->end(); ++it) { - fixup(*it, m, seen); - } -} - - -/** - * Recursively replaces all placeholders in the symbol with the values with the - * corresponding values. - */ -template<typename T> -void fixup_internal(const ProductionPtr& p, - const std::map<T, ProductionPtr> &m, - std::set<ProductionPtr>& seen) -{ - if (seen.find(p) == seen.end()) { - seen.insert(p); - for (Production::iterator it = p->begin(); it != p->end(); ++it) { - fixup(*it, m, seen); - } - } -} - -template<typename T> -void fixup(Symbol& s, const std::map<T, ProductionPtr> &m, - std::set<ProductionPtr>& seen) -{ - switch (s.kind()) { - case Symbol::sIndirect: - fixup_internal(s.extra<ProductionPtr>(), m, seen); - break; - case Symbol::sAlternative: - { - const std::vector<ProductionPtr> *vv = - s.extrap<std::vector<ProductionPtr> >(); - for (std::vector<ProductionPtr>::const_iterator it = vv->begin(); - it != vv->end(); ++it) { - fixup_internal(*it, m, seen); - } - } - break; - case Symbol::sRepeater: - { - const RepeaterInfo& ri = *s.extrap<RepeaterInfo>(); - fixup_internal(boost::tuples::get<2>(ri), m, seen); - fixup_internal(boost::tuples::get<3>(ri), m, seen); - } - break; - case Symbol::sPlaceholder: - { - typename std::map<T, std::shared_ptr<Production> >::const_iterator it = - m.find(s.extra<T>()); - if (it == m.end()) { - throw Exception("Placeholder symbol cannot be resolved"); - } - s = Symbol::symbolic(std::weak_ptr<Production>(it->second)); - } - break; - case Symbol::sUnionAdjust: - fixup_internal(s.extrap<std::pair<size_t, ProductionPtr> >()->second, - m, seen); - break; - default: - break; - } -} - -template<typename Handler> -class SimpleParser { - Decoder* decoder_; - Handler& handler_; - std::stack<Symbol> parsingStack; - - static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) - { - std::ostringstream oss; - oss << "Invalid operation. Schema requires: " << - Symbol::toString(expected) << ", got: " << - Symbol::toString(actual); - throw Exception(oss.str()); - } - - static void assertMatch(Symbol::Kind actual, Symbol::Kind expected) - { - if (expected != actual) { - throwMismatch(actual, expected); - } - - } - - void append(const ProductionPtr& ss) { - for (Production::const_iterator it = ss->begin(); - it != ss->end(); ++it) { - parsingStack.push(*it); - } - } - - size_t popSize() { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sSizeCheck, s.kind()); - size_t result = s.extra<size_t>(); - parsingStack.pop(); - return result; - } - - static void assertLessThan(size_t n, size_t s) { - if (n >= s) { - std::ostringstream oss; - oss << "Size max value. Upper bound: " << s << " found " << n; - throw Exception(oss.str()); - } - } - -public: - Symbol::Kind advance(Symbol::Kind k) { - for (; ;) { - Symbol& s = parsingStack.top(); -// std::cout << "advance: " << Symbol::toString(s.kind()) -// << " looking for " << Symbol::toString(k) << '\n'; - if (s.kind() == k) { - parsingStack.pop(); - return k; - } else if (s.isTerminal()) { - throwMismatch(k, s.kind()); - } else { - switch (s.kind()) { - case Symbol::sRoot: - append(boost::tuples::get<0>(*s.extrap<RootInfo>())); - continue; - case Symbol::sIndirect: - { - ProductionPtr pp = - s.extra<ProductionPtr>(); - parsingStack.pop(); - append(pp); - } - continue; - case Symbol::sSymbolic: - { - ProductionPtr pp( - s.extra<std::weak_ptr<Production> >()); - parsingStack.pop(); - append(pp); - } - continue; - case Symbol::sRepeater: - { - RepeaterInfo *p = s.extrap<RepeaterInfo>(); - std::stack<ssize_t>& ns = boost::tuples::get<0>(*p); - if (ns.empty()) { - throw Exception( - "Empty item count stack in repeater advance"); - } - if (ns.top() == 0) { - throw Exception( - "Zero item count in repeater advance"); - } - --ns.top(); - append(boost::tuples::get<2>(*p)); - } - continue; - case Symbol::sError: - throw Exception(s.extra<std::string>()); - case Symbol::sResolve: - { - const std::pair<Symbol::Kind, Symbol::Kind>* p = - s.extrap<std::pair<Symbol::Kind, Symbol::Kind> >(); - assertMatch(p->second, k); - Symbol::Kind result = p->first; - parsingStack.pop(); - return result; - } - case Symbol::sSkipStart: - parsingStack.pop(); - skip(*decoder_); - break; - default: - if (s.isImplicitAction()) { - size_t n = handler_.handle(s); - if (s.kind() == Symbol::sWriterUnion) { - parsingStack.pop(); - selectBranch(n); - } else { - parsingStack.pop(); - } - } else { - std::ostringstream oss; - oss << "Encountered " << Symbol::toString(s.kind()) - << " while looking for " << Symbol::toString(k); - throw Exception(oss.str()); - } - } - } - } - } - - void skip(Decoder& d) { - const size_t sz = parsingStack.size(); - if (sz == 0) { - throw Exception("Nothing to skip!"); - } - while (parsingStack.size() >= sz) { - Symbol& t = parsingStack.top(); - // std::cout << "skip: " << Symbol::toString(t.kind()) << '\n'; - switch (t.kind()) { - case Symbol::sNull: - d.decodeNull(); - break; - case Symbol::sBool: - d.decodeBool(); - break; - case Symbol::sInt: - d.decodeInt(); - break; - case Symbol::sLong: - d.decodeLong(); - break; - case Symbol::sFloat: - d.decodeFloat(); - break; - case Symbol::sDouble: - d.decodeDouble(); - break; - case Symbol::sString: - d.skipString(); - break; - case Symbol::sBytes: - d.skipBytes(); - break; - case Symbol::sArrayStart: - { - parsingStack.pop(); - size_t n = d.skipArray(); - processImplicitActions(); - assertMatch(Symbol::sRepeater, parsingStack.top().kind()); - if (n == 0) { - break; - } - Symbol& t = parsingStack.top(); - RepeaterInfo *p = t.extrap<RepeaterInfo>(); - boost::tuples::get<0>(*p).push(n); - continue; - } - case Symbol::sArrayEnd: - break; - case Symbol::sMapStart: - { - parsingStack.pop(); - size_t n = d.skipMap(); - processImplicitActions(); - assertMatch(Symbol::sRepeater, parsingStack.top().kind()); - if (n == 0) { - break; - } - Symbol& t = parsingStack.top(); - RepeaterInfo *p = t.extrap<RepeaterInfo>(); - boost::tuples::get<0>(*p).push(n); - continue; - } - case Symbol::sMapEnd: - break; - case Symbol::sFixed: - { - parsingStack.pop(); - Symbol& t = parsingStack.top(); - d.decodeFixed(t.extra<size_t>()); - } - break; - case Symbol::sEnum: - parsingStack.pop(); - d.decodeEnum(); - break; - case Symbol::sUnion: - { - parsingStack.pop(); - size_t n = d.decodeUnionIndex(); - selectBranch(n); - continue; - } - case Symbol::sRepeater: - { - RepeaterInfo *p = t.extrap<RepeaterInfo>(); - std::stack<ssize_t>& ns = boost::tuples::get<0>(*p); - if (ns.empty()) { - throw Exception( - "Empty item count stack in repeater skip"); - } - ssize_t& n = ns.top(); - if (n == 0) { - n = boost::tuples::get<1>(*p) ? d.arrayNext() - : d.mapNext(); - } - if (n != 0) { - --n; - append(boost::tuples::get<3>(*p)); - continue; - } else { - ns.pop(); - } - } - break; - case Symbol::sIndirect: - { - ProductionPtr pp = - t.extra<ProductionPtr>(); - parsingStack.pop(); - append(pp); - } - continue; - case Symbol::sSymbolic: - { - ProductionPtr pp( - t.extra<std::weak_ptr<Production> >()); - parsingStack.pop(); - append(pp); - } - continue; - default: - { - std::ostringstream oss; - oss << "Don't know how to skip " - << Symbol::toString(t.kind()); - throw Exception(oss.str()); - } - } - parsingStack.pop(); - } - } - - void assertSize(size_t n) { - size_t s = popSize(); - if (s != n) { - std::ostringstream oss; - oss << "Incorrect size. Expected: " << s << " found " << n; - throw Exception(oss.str()); - } - } - - void assertLessThanSize(size_t n) { - assertLessThan(n, popSize()); - } - - size_t enumAdjust(size_t n) { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sEnumAdjust, s.kind()); - const std::pair<std::vector<int>, std::vector<std::string> >* v = - s.extrap<std::pair<std::vector<int>, std::vector<std::string> > >(); - assertLessThan(n, v->first.size()); - - int result = v->first[n]; - if (result < 0) { - std::ostringstream oss; - oss << "Cannot resolve symbol: " << v->second[-result - 1] - << std::endl; - throw Exception(oss.str()); - } - parsingStack.pop(); - return result; - } - - size_t unionAdjust() { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sUnionAdjust, s.kind()); - std::pair<size_t, ProductionPtr> p = - s.extra<std::pair<size_t, ProductionPtr> >(); - parsingStack.pop(); - append(p.second); - return p.first; - } - - std::string nameForIndex(size_t e) { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sNameList, s.kind()); - const std::vector<std::string> names = - s.extra<std::vector<std::string> >(); - if (e >= names.size()) { - throw Exception("Not that many names"); - } - std::string result = names[e]; - parsingStack.pop(); - return result; - } - - size_t indexForName(const std::string &name) { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sNameList, s.kind()); - const std::vector<std::string> names = - s.extra<std::vector<std::string> >(); - std::vector<std::string>::const_iterator it = - std::find(names.begin(), names.end(), name); - if (it == names.end()) { - throw Exception("No such enum symbol"); - } - size_t result = it - names.begin(); - parsingStack.pop(); - return result; - } - - void pushRepeatCount(size_t n) { - processImplicitActions(); - Symbol& s = parsingStack.top(); - assertMatch(Symbol::sRepeater, s.kind()); - RepeaterInfo *p = s.extrap<RepeaterInfo>(); - std::stack<ssize_t> &nn = boost::tuples::get<0>(*p); - nn.push(n); - } - - void nextRepeatCount(size_t n) { - processImplicitActions(); - Symbol& s = parsingStack.top(); - assertMatch(Symbol::sRepeater, s.kind()); - RepeaterInfo *p = s.extrap<RepeaterInfo>(); - std::stack<ssize_t> &nn = boost::tuples::get<0>(*p); - if (nn.empty() || nn.top() != 0) { - throw Exception("Wrong number of items"); - } - nn.top() = n; - } - - void popRepeater() { - processImplicitActions(); - Symbol& s = parsingStack.top(); - assertMatch(Symbol::sRepeater, s.kind()); - RepeaterInfo *p = s.extrap<RepeaterInfo>(); - std::stack<ssize_t> &ns = boost::tuples::get<0>(*p); - if (ns.empty()) { - throw Exception("Incorrect number of items (empty)"); - } - if (ns.top() > 0) { - throw Exception("Incorrect number of items (non-zero)"); - } - ns.pop(); - parsingStack.pop(); - } - - void selectBranch(size_t n) { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sAlternative, s.kind()); - std::vector<ProductionPtr> v = - s.extra<std::vector<ProductionPtr> >(); - if (n >= v.size()) { - throw Exception("Not that many branches"); - } - parsingStack.pop(); - append(v[n]); - } - - const std::vector<size_t>& sizeList() { - const Symbol& s = parsingStack.top(); - assertMatch(Symbol::sSizeList, s.kind()); - return *s.extrap<std::vector<size_t> >(); - } - - Symbol::Kind top() const { - return parsingStack.top().kind(); - } - - void pop() { - parsingStack.pop(); - } - - void processImplicitActions() { - for (; ;) { - Symbol& s = parsingStack.top(); - if (s.isImplicitAction()) { - handler_.handle(s); - parsingStack.pop(); - } else if (s.kind() == Symbol::sSkipStart) { - parsingStack.pop(); - skip(*decoder_); - } else { - break; - } - } - } - - SimpleParser(const Symbol& s, Decoder* d, Handler& h) : - decoder_(d), handler_(h) { - parsingStack.push(s); - } - - void reset() { - while (parsingStack.size() > 1) { - parsingStack.pop(); - } - } - -}; - -inline std::ostream& operator<<(std::ostream& os, const Symbol s); - -inline std::ostream& operator<<(std::ostream& os, const Production p) -{ - os << '('; - for (Production::const_iterator it = p.begin(); it != p.end(); ++it) { - os << *it << ", "; - } - os << ')'; - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const Symbol s) -{ - switch (s.kind()) { - case Symbol::sRepeater: - { - const RepeaterInfo& ri = *s.extrap<RepeaterInfo>(); - os << '(' << Symbol::toString(s.kind()) - << ' ' << *boost::tuples::get<2>(ri) - << ' ' << *boost::tuples::get<3>(ri) - << ')'; - } - break; - case Symbol::sIndirect: - { - os << '(' << Symbol::toString(s.kind()) << ' ' - << *s.extra<std::shared_ptr<Production> >() << ')'; - } - break; - case Symbol::sAlternative: - { - os << '(' << Symbol::toString(s.kind()); - for (std::vector<ProductionPtr>::const_iterator it = - s.extrap<std::vector<ProductionPtr> >()->begin(); - it != s.extrap<std::vector<ProductionPtr> >()->end(); - ++it) { - os << ' ' << **it; - } - os << ')'; - } - break; - case Symbol::sSymbolic: - { - os << '(' << Symbol::toString(s.kind()) - << ' ' << s.extra<std::weak_ptr<Production> >().lock() - << ')'; - } - break; - default: - os << Symbol::toString(s.kind()); - break; - } - return os; - } -} // namespace parsing -} // namespace avro - -#endif +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_parsing_Symbol_hh__ +#define avro_parsing_Symbol_hh__ + +#include <vector> +#include <map> +#include <set> +#include <stack> +#include <sstream> + +#include <boost/any.hpp> +#include <boost/tuple/tuple.hpp> + +#include "Node.hh" +#include "Decoder.hh" +#include "Exception.hh" + +namespace avro { +namespace parsing { + +class Symbol; + +typedef std::vector<Symbol> Production; +typedef std::shared_ptr<Production> ProductionPtr; +typedef boost::tuple<std::stack<ssize_t>, bool, ProductionPtr, ProductionPtr> RepeaterInfo; +typedef boost::tuple<ProductionPtr, ProductionPtr> RootInfo; + +class Symbol { +public: + enum Kind { + sTerminalLow, // extra has nothing + sNull, + sBool, + sInt, + sLong, + sFloat, + sDouble, + sString, + sBytes, + sArrayStart, + sArrayEnd, + sMapStart, + sMapEnd, + sFixed, + sEnum, + sUnion, + sTerminalHigh, + sSizeCheck, // Extra has size + sNameList, // Extra has a vector<string> + sRoot, // Root for a schema, extra is Symbol + sRepeater, // Array or Map, extra is symbol + sAlternative, // One of many (union), extra is Union + sPlaceholder, // To be fixed up later. + sIndirect, // extra is shared_ptr<Production> + sSymbolic, // extra is weal_ptr<Production> + sEnumAdjust, + sUnionAdjust, + sSkipStart, + sResolve, + + sImplicitActionLow, + sRecordStart, + sRecordEnd, + sField, // extra is string + sRecord, + sSizeList, + sWriterUnion, + sDefaultStart, // extra has default value in Avro binary encoding + sDefaultEnd, + sImplicitActionHigh, + sError + }; + +private: + Kind kind_; + boost::any extra_; + + + explicit Symbol(Kind k) : kind_(k) { } + template <typename T> Symbol(Kind k, T t) : kind_(k), extra_(t) { } +public: + + Kind kind() const { + return kind_; + } + + template <typename T> T extra() const { + return boost::any_cast<T>(extra_); + } + + template <typename T> T* extrap() { + return boost::any_cast<T>(&extra_); + } + + template <typename T> const T* extrap() const { + return boost::any_cast<T>(&extra_); + } + + template <typename T> void extra(const T& t) { + extra_ = t; + } + + bool isTerminal() const { + return kind_ > sTerminalLow && kind_ < sTerminalHigh; + } + + bool isImplicitAction() const { + return kind_ > sImplicitActionLow && kind_ < sImplicitActionHigh; + } + + static const char* stringValues[]; + static const char* toString(Kind k) { + return stringValues[k]; + } + + static Symbol rootSymbol(ProductionPtr& s) + { + return Symbol(Symbol::sRoot, RootInfo(s, std::make_shared<Production>())); + } + + static Symbol rootSymbol(const ProductionPtr& main, + const ProductionPtr& backup) + { + return Symbol(Symbol::sRoot, RootInfo(main, backup)); + } + + static Symbol nullSymbol() { + return Symbol(sNull); + } + + static Symbol boolSymbol() { + return Symbol(sBool); + } + + static Symbol intSymbol() { + return Symbol(sInt); + } + + static Symbol longSymbol() { + return Symbol(sLong); + } + + static Symbol floatSymbol() { + return Symbol(sFloat); + } + + static Symbol doubleSymbol() { + return Symbol(sDouble); + } + + static Symbol stringSymbol() { + return Symbol(sString); + } + + static Symbol bytesSymbol() { + return Symbol(sBytes); + } + + static Symbol sizeCheckSymbol(size_t s) { + return Symbol(sSizeCheck, s); + } + + static Symbol fixedSymbol() { + return Symbol(sFixed); + } + + static Symbol enumSymbol() { + return Symbol(sEnum); + } + + static Symbol arrayStartSymbol() { + return Symbol(sArrayStart); + } + + static Symbol arrayEndSymbol() { + return Symbol(sArrayEnd); + } + + static Symbol mapStartSymbol() { + return Symbol(sMapStart); + } + + static Symbol mapEndSymbol() { + return Symbol(sMapEnd); + } + + static Symbol repeater(const ProductionPtr& p, + bool isArray) { + return repeater(p, p, isArray); + } + + static Symbol repeater(const ProductionPtr& read, + const ProductionPtr& skip, + bool isArray) { + std::stack<ssize_t> s; + return Symbol(sRepeater, RepeaterInfo(s, isArray, read, skip)); + } + + static Symbol defaultStartAction(std::shared_ptr<std::vector<uint8_t> > bb) + { + return Symbol(sDefaultStart, bb); + } + + static Symbol defaultEndAction() + { + return Symbol(sDefaultEnd); + } + + static Symbol alternative( + const std::vector<ProductionPtr>& branches) + { + return Symbol(Symbol::sAlternative, branches); + } + + static Symbol unionSymbol() { + return Symbol(sUnion); + } + + static Symbol recordStartSymbol() { + return Symbol(sRecordStart); + } + + static Symbol recordEndSymbol() { + return Symbol(sRecordEnd); + } + + static Symbol fieldSymbol(const std::string& name) { + return Symbol(sField, name); + } + + static Symbol writerUnionAction() { + return Symbol(sWriterUnion); + } + + static Symbol nameListSymbol( + const std::vector<std::string>& v) { + return Symbol(sNameList, v); + } + + template <typename T> + static Symbol placeholder(const T& n) { + return Symbol(sPlaceholder, n); + } + + static Symbol indirect(const ProductionPtr& p) { + return Symbol(sIndirect, p); + } + + static Symbol symbolic(const std::weak_ptr<Production>& p) { + return Symbol(sSymbolic, p); + } + + static Symbol enumAdjustSymbol(const NodePtr& writer, + const NodePtr& reader); + + static Symbol unionAdjustSymbol(size_t branch, + const ProductionPtr& p) { + return Symbol(sUnionAdjust, std::make_pair(branch, p)); + } + + static Symbol sizeListAction(std::vector<size_t> order) { + return Symbol(sSizeList, order); + } + + static Symbol recordAction() { + return Symbol(sRecord); + } + + static Symbol error(const NodePtr& writer, const NodePtr& reader); + + static Symbol resolveSymbol(Kind w, Kind r) { + return Symbol(sResolve, std::make_pair(w, r)); + } + + static Symbol skipStart() { + return Symbol(sSkipStart); + } + +}; + +/** + * Recursively replaces all placeholders in the production with the + * corresponding values. + */ +template<typename T> +void fixup(const ProductionPtr& p, + const std::map<T, ProductionPtr> &m) +{ + std::set<ProductionPtr> seen; + for (Production::iterator it = p->begin(); it != p->end(); ++it) { + fixup(*it, m, seen); + } +} + + +/** + * Recursively replaces all placeholders in the symbol with the values with the + * corresponding values. + */ +template<typename T> +void fixup_internal(const ProductionPtr& p, + const std::map<T, ProductionPtr> &m, + std::set<ProductionPtr>& seen) +{ + if (seen.find(p) == seen.end()) { + seen.insert(p); + for (Production::iterator it = p->begin(); it != p->end(); ++it) { + fixup(*it, m, seen); + } + } +} + +template<typename T> +void fixup(Symbol& s, const std::map<T, ProductionPtr> &m, + std::set<ProductionPtr>& seen) +{ + switch (s.kind()) { + case Symbol::sIndirect: + fixup_internal(s.extra<ProductionPtr>(), m, seen); + break; + case Symbol::sAlternative: + { + const std::vector<ProductionPtr> *vv = + s.extrap<std::vector<ProductionPtr> >(); + for (std::vector<ProductionPtr>::const_iterator it = vv->begin(); + it != vv->end(); ++it) { + fixup_internal(*it, m, seen); + } + } + break; + case Symbol::sRepeater: + { + const RepeaterInfo& ri = *s.extrap<RepeaterInfo>(); + fixup_internal(boost::tuples::get<2>(ri), m, seen); + fixup_internal(boost::tuples::get<3>(ri), m, seen); + } + break; + case Symbol::sPlaceholder: + { + typename std::map<T, std::shared_ptr<Production> >::const_iterator it = + m.find(s.extra<T>()); + if (it == m.end()) { + throw Exception("Placeholder symbol cannot be resolved"); + } + s = Symbol::symbolic(std::weak_ptr<Production>(it->second)); + } + break; + case Symbol::sUnionAdjust: + fixup_internal(s.extrap<std::pair<size_t, ProductionPtr> >()->second, + m, seen); + break; + default: + break; + } +} + +template<typename Handler> +class SimpleParser { + Decoder* decoder_; + Handler& handler_; + std::stack<Symbol> parsingStack; + + static void throwMismatch(Symbol::Kind actual, Symbol::Kind expected) + { + std::ostringstream oss; + oss << "Invalid operation. Schema requires: " << + Symbol::toString(expected) << ", got: " << + Symbol::toString(actual); + throw Exception(oss.str()); + } + + static void assertMatch(Symbol::Kind actual, Symbol::Kind expected) + { + if (expected != actual) { + throwMismatch(actual, expected); + } + + } + + void append(const ProductionPtr& ss) { + for (Production::const_iterator it = ss->begin(); + it != ss->end(); ++it) { + parsingStack.push(*it); + } + } + + size_t popSize() { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sSizeCheck, s.kind()); + size_t result = s.extra<size_t>(); + parsingStack.pop(); + return result; + } + + static void assertLessThan(size_t n, size_t s) { + if (n >= s) { + std::ostringstream oss; + oss << "Size max value. Upper bound: " << s << " found " << n; + throw Exception(oss.str()); + } + } + +public: + Symbol::Kind advance(Symbol::Kind k) { + for (; ;) { + Symbol& s = parsingStack.top(); +// std::cout << "advance: " << Symbol::toString(s.kind()) +// << " looking for " << Symbol::toString(k) << '\n'; + if (s.kind() == k) { + parsingStack.pop(); + return k; + } else if (s.isTerminal()) { + throwMismatch(k, s.kind()); + } else { + switch (s.kind()) { + case Symbol::sRoot: + append(boost::tuples::get<0>(*s.extrap<RootInfo>())); + continue; + case Symbol::sIndirect: + { + ProductionPtr pp = + s.extra<ProductionPtr>(); + parsingStack.pop(); + append(pp); + } + continue; + case Symbol::sSymbolic: + { + ProductionPtr pp( + s.extra<std::weak_ptr<Production> >()); + parsingStack.pop(); + append(pp); + } + continue; + case Symbol::sRepeater: + { + RepeaterInfo *p = s.extrap<RepeaterInfo>(); + std::stack<ssize_t>& ns = boost::tuples::get<0>(*p); + if (ns.empty()) { + throw Exception( + "Empty item count stack in repeater advance"); + } + if (ns.top() == 0) { + throw Exception( + "Zero item count in repeater advance"); + } + --ns.top(); + append(boost::tuples::get<2>(*p)); + } + continue; + case Symbol::sError: + throw Exception(s.extra<std::string>()); + case Symbol::sResolve: + { + const std::pair<Symbol::Kind, Symbol::Kind>* p = + s.extrap<std::pair<Symbol::Kind, Symbol::Kind> >(); + assertMatch(p->second, k); + Symbol::Kind result = p->first; + parsingStack.pop(); + return result; + } + case Symbol::sSkipStart: + parsingStack.pop(); + skip(*decoder_); + break; + default: + if (s.isImplicitAction()) { + size_t n = handler_.handle(s); + if (s.kind() == Symbol::sWriterUnion) { + parsingStack.pop(); + selectBranch(n); + } else { + parsingStack.pop(); + } + } else { + std::ostringstream oss; + oss << "Encountered " << Symbol::toString(s.kind()) + << " while looking for " << Symbol::toString(k); + throw Exception(oss.str()); + } + } + } + } + } + + void skip(Decoder& d) { + const size_t sz = parsingStack.size(); + if (sz == 0) { + throw Exception("Nothing to skip!"); + } + while (parsingStack.size() >= sz) { + Symbol& t = parsingStack.top(); + // std::cout << "skip: " << Symbol::toString(t.kind()) << '\n'; + switch (t.kind()) { + case Symbol::sNull: + d.decodeNull(); + break; + case Symbol::sBool: + d.decodeBool(); + break; + case Symbol::sInt: + d.decodeInt(); + break; + case Symbol::sLong: + d.decodeLong(); + break; + case Symbol::sFloat: + d.decodeFloat(); + break; + case Symbol::sDouble: + d.decodeDouble(); + break; + case Symbol::sString: + d.skipString(); + break; + case Symbol::sBytes: + d.skipBytes(); + break; + case Symbol::sArrayStart: + { + parsingStack.pop(); + size_t n = d.skipArray(); + processImplicitActions(); + assertMatch(Symbol::sRepeater, parsingStack.top().kind()); + if (n == 0) { + break; + } + Symbol& t = parsingStack.top(); + RepeaterInfo *p = t.extrap<RepeaterInfo>(); + boost::tuples::get<0>(*p).push(n); + continue; + } + case Symbol::sArrayEnd: + break; + case Symbol::sMapStart: + { + parsingStack.pop(); + size_t n = d.skipMap(); + processImplicitActions(); + assertMatch(Symbol::sRepeater, parsingStack.top().kind()); + if (n == 0) { + break; + } + Symbol& t = parsingStack.top(); + RepeaterInfo *p = t.extrap<RepeaterInfo>(); + boost::tuples::get<0>(*p).push(n); + continue; + } + case Symbol::sMapEnd: + break; + case Symbol::sFixed: + { + parsingStack.pop(); + Symbol& t = parsingStack.top(); + d.decodeFixed(t.extra<size_t>()); + } + break; + case Symbol::sEnum: + parsingStack.pop(); + d.decodeEnum(); + break; + case Symbol::sUnion: + { + parsingStack.pop(); + size_t n = d.decodeUnionIndex(); + selectBranch(n); + continue; + } + case Symbol::sRepeater: + { + RepeaterInfo *p = t.extrap<RepeaterInfo>(); + std::stack<ssize_t>& ns = boost::tuples::get<0>(*p); + if (ns.empty()) { + throw Exception( + "Empty item count stack in repeater skip"); + } + ssize_t& n = ns.top(); + if (n == 0) { + n = boost::tuples::get<1>(*p) ? d.arrayNext() + : d.mapNext(); + } + if (n != 0) { + --n; + append(boost::tuples::get<3>(*p)); + continue; + } else { + ns.pop(); + } + } + break; + case Symbol::sIndirect: + { + ProductionPtr pp = + t.extra<ProductionPtr>(); + parsingStack.pop(); + append(pp); + } + continue; + case Symbol::sSymbolic: + { + ProductionPtr pp( + t.extra<std::weak_ptr<Production> >()); + parsingStack.pop(); + append(pp); + } + continue; + default: + { + std::ostringstream oss; + oss << "Don't know how to skip " + << Symbol::toString(t.kind()); + throw Exception(oss.str()); + } + } + parsingStack.pop(); + } + } + + void assertSize(size_t n) { + size_t s = popSize(); + if (s != n) { + std::ostringstream oss; + oss << "Incorrect size. Expected: " << s << " found " << n; + throw Exception(oss.str()); + } + } + + void assertLessThanSize(size_t n) { + assertLessThan(n, popSize()); + } + + size_t enumAdjust(size_t n) { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sEnumAdjust, s.kind()); + const std::pair<std::vector<int>, std::vector<std::string> >* v = + s.extrap<std::pair<std::vector<int>, std::vector<std::string> > >(); + assertLessThan(n, v->first.size()); + + int result = v->first[n]; + if (result < 0) { + std::ostringstream oss; + oss << "Cannot resolve symbol: " << v->second[-result - 1] + << std::endl; + throw Exception(oss.str()); + } + parsingStack.pop(); + return result; + } + + size_t unionAdjust() { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sUnionAdjust, s.kind()); + std::pair<size_t, ProductionPtr> p = + s.extra<std::pair<size_t, ProductionPtr> >(); + parsingStack.pop(); + append(p.second); + return p.first; + } + + std::string nameForIndex(size_t e) { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sNameList, s.kind()); + const std::vector<std::string> names = + s.extra<std::vector<std::string> >(); + if (e >= names.size()) { + throw Exception("Not that many names"); + } + std::string result = names[e]; + parsingStack.pop(); + return result; + } + + size_t indexForName(const std::string &name) { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sNameList, s.kind()); + const std::vector<std::string> names = + s.extra<std::vector<std::string> >(); + std::vector<std::string>::const_iterator it = + std::find(names.begin(), names.end(), name); + if (it == names.end()) { + throw Exception("No such enum symbol"); + } + size_t result = it - names.begin(); + parsingStack.pop(); + return result; + } + + void pushRepeatCount(size_t n) { + processImplicitActions(); + Symbol& s = parsingStack.top(); + assertMatch(Symbol::sRepeater, s.kind()); + RepeaterInfo *p = s.extrap<RepeaterInfo>(); + std::stack<ssize_t> &nn = boost::tuples::get<0>(*p); + nn.push(n); + } + + void nextRepeatCount(size_t n) { + processImplicitActions(); + Symbol& s = parsingStack.top(); + assertMatch(Symbol::sRepeater, s.kind()); + RepeaterInfo *p = s.extrap<RepeaterInfo>(); + std::stack<ssize_t> &nn = boost::tuples::get<0>(*p); + if (nn.empty() || nn.top() != 0) { + throw Exception("Wrong number of items"); + } + nn.top() = n; + } + + void popRepeater() { + processImplicitActions(); + Symbol& s = parsingStack.top(); + assertMatch(Symbol::sRepeater, s.kind()); + RepeaterInfo *p = s.extrap<RepeaterInfo>(); + std::stack<ssize_t> &ns = boost::tuples::get<0>(*p); + if (ns.empty()) { + throw Exception("Incorrect number of items (empty)"); + } + if (ns.top() > 0) { + throw Exception("Incorrect number of items (non-zero)"); + } + ns.pop(); + parsingStack.pop(); + } + + void selectBranch(size_t n) { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sAlternative, s.kind()); + std::vector<ProductionPtr> v = + s.extra<std::vector<ProductionPtr> >(); + if (n >= v.size()) { + throw Exception("Not that many branches"); + } + parsingStack.pop(); + append(v[n]); + } + + const std::vector<size_t>& sizeList() { + const Symbol& s = parsingStack.top(); + assertMatch(Symbol::sSizeList, s.kind()); + return *s.extrap<std::vector<size_t> >(); + } + + Symbol::Kind top() const { + return parsingStack.top().kind(); + } + + void pop() { + parsingStack.pop(); + } + + void processImplicitActions() { + for (; ;) { + Symbol& s = parsingStack.top(); + if (s.isImplicitAction()) { + handler_.handle(s); + parsingStack.pop(); + } else if (s.kind() == Symbol::sSkipStart) { + parsingStack.pop(); + skip(*decoder_); + } else { + break; + } + } + } + + SimpleParser(const Symbol& s, Decoder* d, Handler& h) : + decoder_(d), handler_(h) { + parsingStack.push(s); + } + + void reset() { + while (parsingStack.size() > 1) { + parsingStack.pop(); + } + } + +}; + +inline std::ostream& operator<<(std::ostream& os, const Symbol s); + +inline std::ostream& operator<<(std::ostream& os, const Production p) +{ + os << '('; + for (Production::const_iterator it = p.begin(); it != p.end(); ++it) { + os << *it << ", "; + } + os << ')'; + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const Symbol s) +{ + switch (s.kind()) { + case Symbol::sRepeater: + { + const RepeaterInfo& ri = *s.extrap<RepeaterInfo>(); + os << '(' << Symbol::toString(s.kind()) + << ' ' << *boost::tuples::get<2>(ri) + << ' ' << *boost::tuples::get<3>(ri) + << ')'; + } + break; + case Symbol::sIndirect: + { + os << '(' << Symbol::toString(s.kind()) << ' ' + << *s.extra<std::shared_ptr<Production> >() << ')'; + } + break; + case Symbol::sAlternative: + { + os << '(' << Symbol::toString(s.kind()); + for (std::vector<ProductionPtr>::const_iterator it = + s.extrap<std::vector<ProductionPtr> >()->begin(); + it != s.extrap<std::vector<ProductionPtr> >()->end(); + ++it) { + os << ' ' << **it; + } + os << ')'; + } + break; + case Symbol::sSymbolic: + { + os << '(' << Symbol::toString(s.kind()) + << ' ' << s.extra<std::weak_ptr<Production> >().lock() + << ')'; + } + break; + default: + os << Symbol::toString(s.kind()); + break; + } + return os; + } +} // namespace parsing +} // namespace avro + +#endif diff --git a/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.cc b/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.cc index fdf6ef898f..8a291d4317 100644 --- a/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.cc +++ b/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.cc @@ -1,591 +1,591 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "ValidatingCodec.hh" - -#include <string> -#include <map> -#include <algorithm> -#include <memory> -#include <boost/any.hpp> - -#include "ValidSchema.hh" -#include "Decoder.hh" -#include "Encoder.hh" -#include "NodeImpl.hh" - -namespace avro { - -using std::make_shared; - -namespace parsing { - -using std::shared_ptr; -using std::static_pointer_cast; - -using std::map; -using std::vector; -using std::pair; -using std::string; -using std::reverse; -using std::ostringstream; - -/** Follows the design of Avro Parser in Java. */ -ProductionPtr ValidatingGrammarGenerator::generate(const NodePtr& n) -{ - map<NodePtr, ProductionPtr> m; - ProductionPtr result = doGenerate(n, m); - fixup(result, m); - return result; -} - -Symbol ValidatingGrammarGenerator::generate(const ValidSchema& schema) -{ - ProductionPtr r = generate(schema.root()); - return Symbol::rootSymbol(r); -} - -ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr& n, - map<NodePtr, ProductionPtr> &m) { - switch (n->type()) { - case AVRO_NULL: - return make_shared<Production>(1, Symbol::nullSymbol()); - case AVRO_BOOL: - return make_shared<Production>(1, Symbol::boolSymbol()); - case AVRO_INT: - return make_shared<Production>(1, Symbol::intSymbol()); - case AVRO_LONG: - return make_shared<Production>(1, Symbol::longSymbol()); - case AVRO_FLOAT: - return make_shared<Production>(1, Symbol::floatSymbol()); - case AVRO_DOUBLE: - return make_shared<Production>(1, Symbol::doubleSymbol()); - case AVRO_STRING: - return make_shared<Production>(1, Symbol::stringSymbol()); - case AVRO_BYTES: - return make_shared<Production>(1, Symbol::bytesSymbol()); - case AVRO_FIXED: - { - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::sizeCheckSymbol(n->fixedSize())); - result->push_back(Symbol::fixedSymbol()); - m[n] = result; - return result; - } - case AVRO_RECORD: - { - ProductionPtr result = make_shared<Production>(); - - m.erase(n); - size_t c = n->leaves(); - for (size_t i = 0; i < c; ++i) { - const NodePtr& leaf = n->leafAt(i); - ProductionPtr v = doGenerate(leaf, m); - copy(v->rbegin(), v->rend(), back_inserter(*result)); - } - reverse(result->begin(), result->end()); - - m[n] = result; - return make_shared<Production>(1, Symbol::indirect(result)); - } - case AVRO_ENUM: - { - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::sizeCheckSymbol(n->names())); - result->push_back(Symbol::enumSymbol()); - m[n] = result; - return result; - } - case AVRO_ARRAY: - { - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::arrayEndSymbol()); - result->push_back(Symbol::repeater(doGenerate(n->leafAt(0), m), true)); - result->push_back(Symbol::arrayStartSymbol()); - return result; - } - case AVRO_MAP: - { - ProductionPtr pp = doGenerate(n->leafAt(1), m); - ProductionPtr v(new Production(*pp)); - v->push_back(Symbol::stringSymbol()); - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::mapEndSymbol()); - result->push_back(Symbol::repeater(v, false)); - result->push_back(Symbol::mapStartSymbol()); - return result; - } - case AVRO_UNION: - { - vector<ProductionPtr> vv; - size_t c = n->leaves(); - vv.reserve(c); - for (size_t i = 0; i < c; ++i) { - vv.push_back(doGenerate(n->leafAt(i), m)); - } - ProductionPtr result = make_shared<Production>(); - result->push_back(Symbol::alternative(vv)); - result->push_back(Symbol::unionSymbol()); - return result; - } - case AVRO_SYMBOLIC: - { - shared_ptr<NodeSymbolic> ns = static_pointer_cast<NodeSymbolic>(n); - NodePtr nn = ns->getNode(); - map<NodePtr, ProductionPtr>::iterator it = - m.find(nn); - if (it != m.end() && it->second) { - return it->second; - } else { - m[nn] = ProductionPtr(); - return make_shared<Production>(1, Symbol::placeholder(nn)); - } - } - default: - throw Exception("Unknown node type"); - } -} - -struct DummyHandler { - size_t handle(const Symbol& s) { - return 0; - } -}; - -template <typename P> -class ValidatingDecoder : public Decoder { - const shared_ptr<Decoder> base; - DummyHandler handler_; - P parser; - - void init(InputStream& is); - void decodeNull(); - bool decodeBool(); - int32_t decodeInt(); - int64_t decodeLong(); - float decodeFloat(); - double decodeDouble(); - void decodeString(string& value); - void skipString(); - void decodeBytes(vector<uint8_t>& value); - void skipBytes(); - void decodeFixed(size_t n, vector<uint8_t>& value); - void skipFixed(size_t n); - size_t decodeEnum(); - size_t arrayStart(); - size_t arrayNext(); - size_t skipArray(); - size_t mapStart(); - size_t mapNext(); - size_t skipMap(); - size_t decodeUnionIndex(); - void drain() { - base->drain(); - } - -public: - - ValidatingDecoder(const ValidSchema& s, const shared_ptr<Decoder> b) : - base(b), - parser(ValidatingGrammarGenerator().generate(s), NULL, handler_) { } - -}; - -template <typename P> -void ValidatingDecoder<P>::init(InputStream& is) -{ - base->init(is); -} - -template <typename P> -void ValidatingDecoder<P>::decodeNull() -{ - parser.advance(Symbol::sNull); - base->decodeNull(); -} - -template <typename P> -bool ValidatingDecoder<P>::decodeBool() -{ - parser.advance(Symbol::sBool); - return base->decodeBool(); -} - -template <typename P> -int32_t ValidatingDecoder<P>::decodeInt() -{ - parser.advance(Symbol::sInt); - return base->decodeInt(); -} - -template <typename P> -int64_t ValidatingDecoder<P>::decodeLong() -{ - parser.advance(Symbol::sLong); - return base->decodeLong(); -} - -template <typename P> -float ValidatingDecoder<P>::decodeFloat() -{ - parser.advance(Symbol::sFloat); - return base->decodeFloat(); -} - -template <typename P> -double ValidatingDecoder<P>::decodeDouble() -{ - parser.advance(Symbol::sDouble); - return base->decodeDouble(); -} - -template <typename P> -void ValidatingDecoder<P>::decodeString(string& value) -{ - parser.advance(Symbol::sString); - base->decodeString(value); -} - -template <typename P> -void ValidatingDecoder<P>::skipString() -{ - parser.advance(Symbol::sString); - base->skipString(); -} - -template <typename P> -void ValidatingDecoder<P>::decodeBytes(vector<uint8_t>& value) -{ - parser.advance(Symbol::sBytes); - base->decodeBytes(value); -} - -template <typename P> -void ValidatingDecoder<P>::skipBytes() -{ - parser.advance(Symbol::sBytes); - base->skipBytes(); -} - -template <typename P> -void ValidatingDecoder<P>::decodeFixed(size_t n, vector<uint8_t>& value) -{ - parser.advance(Symbol::sFixed); - parser.assertSize(n); - base->decodeFixed(n, value); -} - -template <typename P> -void ValidatingDecoder<P>::skipFixed(size_t n) -{ - parser.advance(Symbol::sFixed); - parser.assertSize(n); - base->skipFixed(n); -} - -template <typename P> -size_t ValidatingDecoder<P>::decodeEnum() -{ - parser.advance(Symbol::sEnum); - size_t result = base->decodeEnum(); - parser.assertLessThanSize(result); - return result; -} - -template <typename P> -size_t ValidatingDecoder<P>::arrayStart() -{ - parser.advance(Symbol::sArrayStart); - size_t result = base->arrayStart(); - parser.pushRepeatCount(result); - if (result == 0) { - parser.popRepeater(); - parser.advance(Symbol::sArrayEnd); - } - return result; -} - -template <typename P> -size_t ValidatingDecoder<P>::arrayNext() -{ - size_t result = base->arrayNext(); - parser.nextRepeatCount(result); - if (result == 0) { - parser.popRepeater(); - parser.advance(Symbol::sArrayEnd); - } - return result; -} - -template <typename P> -size_t ValidatingDecoder<P>::skipArray() -{ - parser.advance(Symbol::sArrayStart); - size_t n = base->skipArray(); - if (n == 0) { - parser.pop(); - } else { - parser.pushRepeatCount(n); - parser.skip(*base); - } - parser.advance(Symbol::sArrayEnd); - return 0; -} - -template <typename P> -size_t ValidatingDecoder<P>::mapStart() -{ - parser.advance(Symbol::sMapStart); - size_t result = base->mapStart(); - parser.pushRepeatCount(result); - if (result == 0) { - parser.popRepeater(); - parser.advance(Symbol::sMapEnd); - } - return result; -} - -template <typename P> -size_t ValidatingDecoder<P>::mapNext() -{ - size_t result = base->mapNext(); - parser.nextRepeatCount(result); - if (result == 0) { - parser.popRepeater(); - parser.advance(Symbol::sMapEnd); - } - return result; -} - -template <typename P> -size_t ValidatingDecoder<P>::skipMap() -{ - parser.advance(Symbol::sMapStart); - size_t n = base->skipMap(); - if (n == 0) { - parser.pop(); - } else { - parser.pushRepeatCount(n); - parser.skip(*base); - } - parser.advance(Symbol::sMapEnd); - return 0; -} - -template <typename P> -size_t ValidatingDecoder<P>::decodeUnionIndex() -{ - parser.advance(Symbol::sUnion); - size_t result = base->decodeUnionIndex(); - parser.selectBranch(result); - return result; -} - -template <typename P> -class ValidatingEncoder : public Encoder { - DummyHandler handler_; - P parser_; - EncoderPtr base_; - - void init(OutputStream& os); - void flush(); - int64_t byteCount() const; - void encodeNull(); - void encodeBool(bool b); - void encodeInt(int32_t i); - void encodeLong(int64_t l); - void encodeFloat(float f); - void encodeDouble(double d); - void encodeString(const std::string& s); - void encodeBytes(const uint8_t *bytes, size_t len); - void encodeFixed(const uint8_t *bytes, size_t len); - void encodeEnum(size_t e); - void arrayStart(); - void arrayEnd(); - void mapStart(); - void mapEnd(); - void setItemCount(size_t count); - void startItem(); - void encodeUnionIndex(size_t e); -public: - ValidatingEncoder(const ValidSchema& schema, const EncoderPtr& base) : - parser_(ValidatingGrammarGenerator().generate(schema), NULL, handler_), - base_(base) { } -}; - -template<typename P> -void ValidatingEncoder<P>::init(OutputStream& os) -{ - base_->init(os); -} - -template<typename P> -void ValidatingEncoder<P>::flush() -{ - base_->flush(); -} - -template<typename P> -void ValidatingEncoder<P>::encodeNull() -{ - parser_.advance(Symbol::sNull); - base_->encodeNull(); -} - -template<typename P> -void ValidatingEncoder<P>::encodeBool(bool b) -{ - parser_.advance(Symbol::sBool); - base_->encodeBool(b); -} - -template<typename P> -void ValidatingEncoder<P>::encodeInt(int32_t i) -{ - parser_.advance(Symbol::sInt); - base_->encodeInt(i); -} - -template<typename P> -void ValidatingEncoder<P>::encodeLong(int64_t l) -{ - parser_.advance(Symbol::sLong); - base_->encodeLong(l); -} - -template<typename P> -void ValidatingEncoder<P>::encodeFloat(float f) -{ - parser_.advance(Symbol::sFloat); - base_->encodeFloat(f); -} - -template<typename P> -void ValidatingEncoder<P>::encodeDouble(double d) -{ - parser_.advance(Symbol::sDouble); - base_->encodeDouble(d); -} - -template<typename P> -void ValidatingEncoder<P>::encodeString(const std::string& s) -{ - parser_.advance(Symbol::sString); - base_->encodeString(s); -} - -template<typename P> -void ValidatingEncoder<P>::encodeBytes(const uint8_t *bytes, size_t len) -{ - parser_.advance(Symbol::sBytes); - base_->encodeBytes(bytes, len); -} - -template<typename P> -void ValidatingEncoder<P>::encodeFixed(const uint8_t *bytes, size_t len) -{ - parser_.advance(Symbol::sFixed); - parser_.assertSize(len); - base_->encodeFixed(bytes, len); -} - -template<typename P> -void ValidatingEncoder<P>::encodeEnum(size_t e) -{ - parser_.advance(Symbol::sEnum); - parser_.assertLessThanSize(e); - base_->encodeEnum(e); -} - -template<typename P> -void ValidatingEncoder<P>::arrayStart() -{ - parser_.advance(Symbol::sArrayStart); - parser_.pushRepeatCount(0); - base_->arrayStart(); -} - -template<typename P> -void ValidatingEncoder<P>::arrayEnd() -{ - parser_.popRepeater(); - parser_.advance(Symbol::sArrayEnd); - base_->arrayEnd(); -} - -template<typename P> -void ValidatingEncoder<P>::mapStart() -{ - parser_.advance(Symbol::sMapStart); - parser_.pushRepeatCount(0); - base_->mapStart(); -} - -template<typename P> -void ValidatingEncoder<P>::mapEnd() -{ - parser_.popRepeater(); - parser_.advance(Symbol::sMapEnd); - base_->mapEnd(); -} - -template<typename P> -void ValidatingEncoder<P>::setItemCount(size_t count) -{ - parser_.nextRepeatCount(count); - base_->setItemCount(count); -} - -template<typename P> -void ValidatingEncoder<P>::startItem() -{ - if (parser_.top() != Symbol::sRepeater) { - throw Exception("startItem at not an item boundary"); - } - base_->startItem(); -} - -template<typename P> -void ValidatingEncoder<P>::encodeUnionIndex(size_t e) -{ - parser_.advance(Symbol::sUnion); - parser_.selectBranch(e); - base_->encodeUnionIndex(e); -} - -template<typename P> -int64_t ValidatingEncoder<P>::byteCount() const -{ - return base_->byteCount(); -} - -} // namespace parsing - -DecoderPtr validatingDecoder(const ValidSchema& s, - const DecoderPtr& base) -{ - return make_shared<parsing::ValidatingDecoder<parsing::SimpleParser<parsing::DummyHandler> > >(s, base); -} - -EncoderPtr validatingEncoder(const ValidSchema& schema, const EncoderPtr& base) -{ - return make_shared<parsing::ValidatingEncoder<parsing::SimpleParser<parsing::DummyHandler> > >(schema, base); -} - -} // namespace avro - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ValidatingCodec.hh" + +#include <string> +#include <map> +#include <algorithm> +#include <memory> +#include <boost/any.hpp> + +#include "ValidSchema.hh" +#include "Decoder.hh" +#include "Encoder.hh" +#include "NodeImpl.hh" + +namespace avro { + +using std::make_shared; + +namespace parsing { + +using std::shared_ptr; +using std::static_pointer_cast; + +using std::map; +using std::vector; +using std::pair; +using std::string; +using std::reverse; +using std::ostringstream; + +/** Follows the design of Avro Parser in Java. */ +ProductionPtr ValidatingGrammarGenerator::generate(const NodePtr& n) +{ + map<NodePtr, ProductionPtr> m; + ProductionPtr result = doGenerate(n, m); + fixup(result, m); + return result; +} + +Symbol ValidatingGrammarGenerator::generate(const ValidSchema& schema) +{ + ProductionPtr r = generate(schema.root()); + return Symbol::rootSymbol(r); +} + +ProductionPtr ValidatingGrammarGenerator::doGenerate(const NodePtr& n, + map<NodePtr, ProductionPtr> &m) { + switch (n->type()) { + case AVRO_NULL: + return make_shared<Production>(1, Symbol::nullSymbol()); + case AVRO_BOOL: + return make_shared<Production>(1, Symbol::boolSymbol()); + case AVRO_INT: + return make_shared<Production>(1, Symbol::intSymbol()); + case AVRO_LONG: + return make_shared<Production>(1, Symbol::longSymbol()); + case AVRO_FLOAT: + return make_shared<Production>(1, Symbol::floatSymbol()); + case AVRO_DOUBLE: + return make_shared<Production>(1, Symbol::doubleSymbol()); + case AVRO_STRING: + return make_shared<Production>(1, Symbol::stringSymbol()); + case AVRO_BYTES: + return make_shared<Production>(1, Symbol::bytesSymbol()); + case AVRO_FIXED: + { + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::sizeCheckSymbol(n->fixedSize())); + result->push_back(Symbol::fixedSymbol()); + m[n] = result; + return result; + } + case AVRO_RECORD: + { + ProductionPtr result = make_shared<Production>(); + + m.erase(n); + size_t c = n->leaves(); + for (size_t i = 0; i < c; ++i) { + const NodePtr& leaf = n->leafAt(i); + ProductionPtr v = doGenerate(leaf, m); + copy(v->rbegin(), v->rend(), back_inserter(*result)); + } + reverse(result->begin(), result->end()); + + m[n] = result; + return make_shared<Production>(1, Symbol::indirect(result)); + } + case AVRO_ENUM: + { + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::sizeCheckSymbol(n->names())); + result->push_back(Symbol::enumSymbol()); + m[n] = result; + return result; + } + case AVRO_ARRAY: + { + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::arrayEndSymbol()); + result->push_back(Symbol::repeater(doGenerate(n->leafAt(0), m), true)); + result->push_back(Symbol::arrayStartSymbol()); + return result; + } + case AVRO_MAP: + { + ProductionPtr pp = doGenerate(n->leafAt(1), m); + ProductionPtr v(new Production(*pp)); + v->push_back(Symbol::stringSymbol()); + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::mapEndSymbol()); + result->push_back(Symbol::repeater(v, false)); + result->push_back(Symbol::mapStartSymbol()); + return result; + } + case AVRO_UNION: + { + vector<ProductionPtr> vv; + size_t c = n->leaves(); + vv.reserve(c); + for (size_t i = 0; i < c; ++i) { + vv.push_back(doGenerate(n->leafAt(i), m)); + } + ProductionPtr result = make_shared<Production>(); + result->push_back(Symbol::alternative(vv)); + result->push_back(Symbol::unionSymbol()); + return result; + } + case AVRO_SYMBOLIC: + { + shared_ptr<NodeSymbolic> ns = static_pointer_cast<NodeSymbolic>(n); + NodePtr nn = ns->getNode(); + map<NodePtr, ProductionPtr>::iterator it = + m.find(nn); + if (it != m.end() && it->second) { + return it->second; + } else { + m[nn] = ProductionPtr(); + return make_shared<Production>(1, Symbol::placeholder(nn)); + } + } + default: + throw Exception("Unknown node type"); + } +} + +struct DummyHandler { + size_t handle(const Symbol& s) { + return 0; + } +}; + +template <typename P> +class ValidatingDecoder : public Decoder { + const shared_ptr<Decoder> base; + DummyHandler handler_; + P parser; + + void init(InputStream& is); + void decodeNull(); + bool decodeBool(); + int32_t decodeInt(); + int64_t decodeLong(); + float decodeFloat(); + double decodeDouble(); + void decodeString(string& value); + void skipString(); + void decodeBytes(vector<uint8_t>& value); + void skipBytes(); + void decodeFixed(size_t n, vector<uint8_t>& value); + void skipFixed(size_t n); + size_t decodeEnum(); + size_t arrayStart(); + size_t arrayNext(); + size_t skipArray(); + size_t mapStart(); + size_t mapNext(); + size_t skipMap(); + size_t decodeUnionIndex(); + void drain() { + base->drain(); + } + +public: + + ValidatingDecoder(const ValidSchema& s, const shared_ptr<Decoder> b) : + base(b), + parser(ValidatingGrammarGenerator().generate(s), NULL, handler_) { } + +}; + +template <typename P> +void ValidatingDecoder<P>::init(InputStream& is) +{ + base->init(is); +} + +template <typename P> +void ValidatingDecoder<P>::decodeNull() +{ + parser.advance(Symbol::sNull); + base->decodeNull(); +} + +template <typename P> +bool ValidatingDecoder<P>::decodeBool() +{ + parser.advance(Symbol::sBool); + return base->decodeBool(); +} + +template <typename P> +int32_t ValidatingDecoder<P>::decodeInt() +{ + parser.advance(Symbol::sInt); + return base->decodeInt(); +} + +template <typename P> +int64_t ValidatingDecoder<P>::decodeLong() +{ + parser.advance(Symbol::sLong); + return base->decodeLong(); +} + +template <typename P> +float ValidatingDecoder<P>::decodeFloat() +{ + parser.advance(Symbol::sFloat); + return base->decodeFloat(); +} + +template <typename P> +double ValidatingDecoder<P>::decodeDouble() +{ + parser.advance(Symbol::sDouble); + return base->decodeDouble(); +} + +template <typename P> +void ValidatingDecoder<P>::decodeString(string& value) +{ + parser.advance(Symbol::sString); + base->decodeString(value); +} + +template <typename P> +void ValidatingDecoder<P>::skipString() +{ + parser.advance(Symbol::sString); + base->skipString(); +} + +template <typename P> +void ValidatingDecoder<P>::decodeBytes(vector<uint8_t>& value) +{ + parser.advance(Symbol::sBytes); + base->decodeBytes(value); +} + +template <typename P> +void ValidatingDecoder<P>::skipBytes() +{ + parser.advance(Symbol::sBytes); + base->skipBytes(); +} + +template <typename P> +void ValidatingDecoder<P>::decodeFixed(size_t n, vector<uint8_t>& value) +{ + parser.advance(Symbol::sFixed); + parser.assertSize(n); + base->decodeFixed(n, value); +} + +template <typename P> +void ValidatingDecoder<P>::skipFixed(size_t n) +{ + parser.advance(Symbol::sFixed); + parser.assertSize(n); + base->skipFixed(n); +} + +template <typename P> +size_t ValidatingDecoder<P>::decodeEnum() +{ + parser.advance(Symbol::sEnum); + size_t result = base->decodeEnum(); + parser.assertLessThanSize(result); + return result; +} + +template <typename P> +size_t ValidatingDecoder<P>::arrayStart() +{ + parser.advance(Symbol::sArrayStart); + size_t result = base->arrayStart(); + parser.pushRepeatCount(result); + if (result == 0) { + parser.popRepeater(); + parser.advance(Symbol::sArrayEnd); + } + return result; +} + +template <typename P> +size_t ValidatingDecoder<P>::arrayNext() +{ + size_t result = base->arrayNext(); + parser.nextRepeatCount(result); + if (result == 0) { + parser.popRepeater(); + parser.advance(Symbol::sArrayEnd); + } + return result; +} + +template <typename P> +size_t ValidatingDecoder<P>::skipArray() +{ + parser.advance(Symbol::sArrayStart); + size_t n = base->skipArray(); + if (n == 0) { + parser.pop(); + } else { + parser.pushRepeatCount(n); + parser.skip(*base); + } + parser.advance(Symbol::sArrayEnd); + return 0; +} + +template <typename P> +size_t ValidatingDecoder<P>::mapStart() +{ + parser.advance(Symbol::sMapStart); + size_t result = base->mapStart(); + parser.pushRepeatCount(result); + if (result == 0) { + parser.popRepeater(); + parser.advance(Symbol::sMapEnd); + } + return result; +} + +template <typename P> +size_t ValidatingDecoder<P>::mapNext() +{ + size_t result = base->mapNext(); + parser.nextRepeatCount(result); + if (result == 0) { + parser.popRepeater(); + parser.advance(Symbol::sMapEnd); + } + return result; +} + +template <typename P> +size_t ValidatingDecoder<P>::skipMap() +{ + parser.advance(Symbol::sMapStart); + size_t n = base->skipMap(); + if (n == 0) { + parser.pop(); + } else { + parser.pushRepeatCount(n); + parser.skip(*base); + } + parser.advance(Symbol::sMapEnd); + return 0; +} + +template <typename P> +size_t ValidatingDecoder<P>::decodeUnionIndex() +{ + parser.advance(Symbol::sUnion); + size_t result = base->decodeUnionIndex(); + parser.selectBranch(result); + return result; +} + +template <typename P> +class ValidatingEncoder : public Encoder { + DummyHandler handler_; + P parser_; + EncoderPtr base_; + + void init(OutputStream& os); + void flush(); + int64_t byteCount() const; + void encodeNull(); + void encodeBool(bool b); + void encodeInt(int32_t i); + void encodeLong(int64_t l); + void encodeFloat(float f); + void encodeDouble(double d); + void encodeString(const std::string& s); + void encodeBytes(const uint8_t *bytes, size_t len); + void encodeFixed(const uint8_t *bytes, size_t len); + void encodeEnum(size_t e); + void arrayStart(); + void arrayEnd(); + void mapStart(); + void mapEnd(); + void setItemCount(size_t count); + void startItem(); + void encodeUnionIndex(size_t e); +public: + ValidatingEncoder(const ValidSchema& schema, const EncoderPtr& base) : + parser_(ValidatingGrammarGenerator().generate(schema), NULL, handler_), + base_(base) { } +}; + +template<typename P> +void ValidatingEncoder<P>::init(OutputStream& os) +{ + base_->init(os); +} + +template<typename P> +void ValidatingEncoder<P>::flush() +{ + base_->flush(); +} + +template<typename P> +void ValidatingEncoder<P>::encodeNull() +{ + parser_.advance(Symbol::sNull); + base_->encodeNull(); +} + +template<typename P> +void ValidatingEncoder<P>::encodeBool(bool b) +{ + parser_.advance(Symbol::sBool); + base_->encodeBool(b); +} + +template<typename P> +void ValidatingEncoder<P>::encodeInt(int32_t i) +{ + parser_.advance(Symbol::sInt); + base_->encodeInt(i); +} + +template<typename P> +void ValidatingEncoder<P>::encodeLong(int64_t l) +{ + parser_.advance(Symbol::sLong); + base_->encodeLong(l); +} + +template<typename P> +void ValidatingEncoder<P>::encodeFloat(float f) +{ + parser_.advance(Symbol::sFloat); + base_->encodeFloat(f); +} + +template<typename P> +void ValidatingEncoder<P>::encodeDouble(double d) +{ + parser_.advance(Symbol::sDouble); + base_->encodeDouble(d); +} + +template<typename P> +void ValidatingEncoder<P>::encodeString(const std::string& s) +{ + parser_.advance(Symbol::sString); + base_->encodeString(s); +} + +template<typename P> +void ValidatingEncoder<P>::encodeBytes(const uint8_t *bytes, size_t len) +{ + parser_.advance(Symbol::sBytes); + base_->encodeBytes(bytes, len); +} + +template<typename P> +void ValidatingEncoder<P>::encodeFixed(const uint8_t *bytes, size_t len) +{ + parser_.advance(Symbol::sFixed); + parser_.assertSize(len); + base_->encodeFixed(bytes, len); +} + +template<typename P> +void ValidatingEncoder<P>::encodeEnum(size_t e) +{ + parser_.advance(Symbol::sEnum); + parser_.assertLessThanSize(e); + base_->encodeEnum(e); +} + +template<typename P> +void ValidatingEncoder<P>::arrayStart() +{ + parser_.advance(Symbol::sArrayStart); + parser_.pushRepeatCount(0); + base_->arrayStart(); +} + +template<typename P> +void ValidatingEncoder<P>::arrayEnd() +{ + parser_.popRepeater(); + parser_.advance(Symbol::sArrayEnd); + base_->arrayEnd(); +} + +template<typename P> +void ValidatingEncoder<P>::mapStart() +{ + parser_.advance(Symbol::sMapStart); + parser_.pushRepeatCount(0); + base_->mapStart(); +} + +template<typename P> +void ValidatingEncoder<P>::mapEnd() +{ + parser_.popRepeater(); + parser_.advance(Symbol::sMapEnd); + base_->mapEnd(); +} + +template<typename P> +void ValidatingEncoder<P>::setItemCount(size_t count) +{ + parser_.nextRepeatCount(count); + base_->setItemCount(count); +} + +template<typename P> +void ValidatingEncoder<P>::startItem() +{ + if (parser_.top() != Symbol::sRepeater) { + throw Exception("startItem at not an item boundary"); + } + base_->startItem(); +} + +template<typename P> +void ValidatingEncoder<P>::encodeUnionIndex(size_t e) +{ + parser_.advance(Symbol::sUnion); + parser_.selectBranch(e); + base_->encodeUnionIndex(e); +} + +template<typename P> +int64_t ValidatingEncoder<P>::byteCount() const +{ + return base_->byteCount(); +} + +} // namespace parsing + +DecoderPtr validatingDecoder(const ValidSchema& s, + const DecoderPtr& base) +{ + return make_shared<parsing::ValidatingDecoder<parsing::SimpleParser<parsing::DummyHandler> > >(s, base); +} + +EncoderPtr validatingEncoder(const ValidSchema& schema, const EncoderPtr& base) +{ + return make_shared<parsing::ValidatingEncoder<parsing::SimpleParser<parsing::DummyHandler> > >(schema, base); +} + +} // namespace avro + diff --git a/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.hh b/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.hh index 39ceda033e..b90b3ea64a 100644 --- a/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.hh +++ b/contrib/libs/apache/avro/impl/parsing/ValidatingCodec.hh @@ -1,51 +1,51 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef avro_parsing_ValidatingCodec_hh__ -#define avro_parsing_ValidatingCodec_hh__ - -#include <map> -#include <vector> - -#include "Symbol.hh" -#include "ValidSchema.hh" -#include "NodeImpl.hh" - -namespace avro { -namespace parsing { - -class ValidatingGrammarGenerator { -protected: - template<typename T> - static void doFixup(Production& p, const std::map<T, ProductionPtr> &m); - - template<typename T> - static void doFixup(Symbol &s, const std::map<T, ProductionPtr> &m); - virtual ProductionPtr doGenerate(const NodePtr& n, - std::map<NodePtr, ProductionPtr> &m); - - ProductionPtr generate(const NodePtr& schema); -public: - Symbol generate(const ValidSchema& schema); - -}; - -} // namespace parsing -} // namespace avro - -#endif +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_parsing_ValidatingCodec_hh__ +#define avro_parsing_ValidatingCodec_hh__ + +#include <map> +#include <vector> + +#include "Symbol.hh" +#include "ValidSchema.hh" +#include "NodeImpl.hh" + +namespace avro { +namespace parsing { + +class ValidatingGrammarGenerator { +protected: + template<typename T> + static void doFixup(Production& p, const std::map<T, ProductionPtr> &m); + + template<typename T> + static void doFixup(Symbol &s, const std::map<T, ProductionPtr> &m); + virtual ProductionPtr doGenerate(const NodePtr& n, + std::map<NodePtr, ProductionPtr> &m); + + ProductionPtr generate(const NodePtr& schema); +public: + Symbol generate(const ValidSchema& schema); + +}; + +} // namespace parsing +} // namespace avro + +#endif |