diff options
author | thegeorg <thegeorg@yandex-team.ru> | 2022-02-10 16:45:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:08 +0300 |
commit | 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (patch) | |
tree | 506dac10f5df94fab310584ee51b24fc5a081c22 /contrib/libs/apache/avro/impl/json | |
parent | 2d37894b1b037cf24231090eda8589bbb44fb6fc (diff) | |
download | ydb-4e839db24a3bbc9f1c610c43d6faaaa99824dcca.tar.gz |
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/apache/avro/impl/json')
-rw-r--r-- | contrib/libs/apache/avro/impl/json/JsonDom.cc | 406 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/json/JsonDom.hh | 324 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/json/JsonIO.cc | 884 | ||||
-rw-r--r-- | contrib/libs/apache/avro/impl/json/JsonIO.hh | 964 |
4 files changed, 1289 insertions, 1289 deletions
diff --git a/contrib/libs/apache/avro/impl/json/JsonDom.cc b/contrib/libs/apache/avro/impl/json/JsonDom.cc index ac4d8c9bfc..8a41912be2 100644 --- a/contrib/libs/apache/avro/impl/json/JsonDom.cc +++ b/contrib/libs/apache/avro/impl/json/JsonDom.cc @@ -1,203 +1,203 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "JsonDom.hh" - -#include <stdexcept> - -#include <string.h> - -#include "Stream.hh" -#include "JsonIO.hh" - -using std::string; -using boost::format; - -namespace avro { -namespace json { -const char* typeToString(EntityType t) -{ - switch (t) { - case etNull: return "null"; - case etBool: return "bool"; - case etLong: return "long"; - case etDouble: return "double"; - case etString: return "string"; - case etArray: return "array"; - case etObject: return "object"; - default: return "unknown"; - } -} - -Entity readEntity(JsonParser& p) -{ - switch (p.peek()) { - case JsonParser::tkNull: - p.advance(); - return Entity(p.line()); - case JsonParser::tkBool: - p.advance(); - return Entity(p.boolValue(), p.line()); - case JsonParser::tkLong: - p.advance(); - return Entity(p.longValue(), p.line()); - case JsonParser::tkDouble: - p.advance(); - return Entity(p.doubleValue(), p.line()); - case JsonParser::tkString: - p.advance(); - return Entity(std::make_shared<String>(p.rawString()), p.line()); - case JsonParser::tkArrayStart: - { - size_t l = p.line(); - p.advance(); - std::shared_ptr<Array> v = std::make_shared<Array>(); - while (p.peek() != JsonParser::tkArrayEnd) { - v->push_back(readEntity(p)); - } - p.advance(); - return Entity(v, l); - } - case JsonParser::tkObjectStart: - { - size_t l = p.line(); - p.advance(); - std::shared_ptr<Object> v = std::make_shared<Object>(); - while (p.peek() != JsonParser::tkObjectEnd) { - p.advance(); - std::string k = p.stringValue(); - Entity n = readEntity(p); - v->insert(std::make_pair(k, n)); - } - p.advance(); - return Entity(v, l); - } - default: - throw std::domain_error(JsonParser::toString(p.peek())); - } - -} - -Entity loadEntity(const char* text) -{ - return loadEntity(reinterpret_cast<const uint8_t*>(text), ::strlen(text)); -} - -Entity loadEntity(InputStream& in) -{ - JsonParser p; - p.init(in); - return readEntity(p); -} - -Entity loadEntity(const uint8_t* text, size_t len) -{ - std::unique_ptr<InputStream> in = memoryInputStream(text, len); - return loadEntity(*in); -} - -void writeEntity(JsonGenerator<JsonNullFormatter>& g, const Entity& n) -{ - switch (n.type()) { - case etNull: - g.encodeNull(); - break; - case etBool: - g.encodeBool(n.boolValue()); - break; - case etLong: - g.encodeNumber(n.longValue()); - break; - case etDouble: - g.encodeNumber(n.doubleValue()); - break; - case etString: - g.encodeString(n.stringValue()); - break; - case etArray: - { - g.arrayStart(); - const Array& v = n.arrayValue(); - for (Array::const_iterator it = v.begin(); - it != v.end(); ++it) { - writeEntity(g, *it); - } - g.arrayEnd(); - } - break; - case etObject: - { - g.objectStart(); - const Object& v = n.objectValue(); - for (Object::const_iterator it = v.begin(); it != v.end(); ++it) { - g.encodeString(it->first); - writeEntity(g, it->second); - } - g.objectEnd(); - } - break; - } -} - -void Entity::ensureType(EntityType type) const -{ - if (type_ != type) { - format msg = format("Invalid type. Expected \"%1%\" actual %2%") % - typeToString(type) % typeToString(type_); - throw Exception(msg); - } -} - -String Entity::stringValue() const { - ensureType(etString); - return JsonParser::toStringValue(**boost::any_cast<std::shared_ptr<String> >(&value_)); -} - -String Entity::bytesValue() const { - ensureType(etString); - return JsonParser::toBytesValue(**boost::any_cast<std::shared_ptr<String> >(&value_)); -} - -std::string Entity::toString() const -{ - std::unique_ptr<OutputStream> out = memoryOutputStream(); - JsonGenerator<JsonNullFormatter> g; - g.init(*out); - writeEntity(g, *this); - g.flush(); - std::unique_ptr<InputStream> in = memoryInputStream(*out); - const uint8_t *p = 0; - size_t n = 0; - size_t c = 0; - while (in->next(&p, &n)) { - c += n; - } - std::string result; - result.resize(c); - c = 0; - std::unique_ptr<InputStream> in2 = memoryInputStream(*out); - while (in2->next(&p, &n)) { - ::memcpy(&result[c], p, n); - c += n; - } - return result; -} - -} -} - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "JsonDom.hh" + +#include <stdexcept> + +#include <string.h> + +#include "Stream.hh" +#include "JsonIO.hh" + +using std::string; +using boost::format; + +namespace avro { +namespace json { +const char* typeToString(EntityType t) +{ + switch (t) { + case etNull: return "null"; + case etBool: return "bool"; + case etLong: return "long"; + case etDouble: return "double"; + case etString: return "string"; + case etArray: return "array"; + case etObject: return "object"; + default: return "unknown"; + } +} + +Entity readEntity(JsonParser& p) +{ + switch (p.peek()) { + case JsonParser::tkNull: + p.advance(); + return Entity(p.line()); + case JsonParser::tkBool: + p.advance(); + return Entity(p.boolValue(), p.line()); + case JsonParser::tkLong: + p.advance(); + return Entity(p.longValue(), p.line()); + case JsonParser::tkDouble: + p.advance(); + return Entity(p.doubleValue(), p.line()); + case JsonParser::tkString: + p.advance(); + return Entity(std::make_shared<String>(p.rawString()), p.line()); + case JsonParser::tkArrayStart: + { + size_t l = p.line(); + p.advance(); + std::shared_ptr<Array> v = std::make_shared<Array>(); + while (p.peek() != JsonParser::tkArrayEnd) { + v->push_back(readEntity(p)); + } + p.advance(); + return Entity(v, l); + } + case JsonParser::tkObjectStart: + { + size_t l = p.line(); + p.advance(); + std::shared_ptr<Object> v = std::make_shared<Object>(); + while (p.peek() != JsonParser::tkObjectEnd) { + p.advance(); + std::string k = p.stringValue(); + Entity n = readEntity(p); + v->insert(std::make_pair(k, n)); + } + p.advance(); + return Entity(v, l); + } + default: + throw std::domain_error(JsonParser::toString(p.peek())); + } + +} + +Entity loadEntity(const char* text) +{ + return loadEntity(reinterpret_cast<const uint8_t*>(text), ::strlen(text)); +} + +Entity loadEntity(InputStream& in) +{ + JsonParser p; + p.init(in); + return readEntity(p); +} + +Entity loadEntity(const uint8_t* text, size_t len) +{ + std::unique_ptr<InputStream> in = memoryInputStream(text, len); + return loadEntity(*in); +} + +void writeEntity(JsonGenerator<JsonNullFormatter>& g, const Entity& n) +{ + switch (n.type()) { + case etNull: + g.encodeNull(); + break; + case etBool: + g.encodeBool(n.boolValue()); + break; + case etLong: + g.encodeNumber(n.longValue()); + break; + case etDouble: + g.encodeNumber(n.doubleValue()); + break; + case etString: + g.encodeString(n.stringValue()); + break; + case etArray: + { + g.arrayStart(); + const Array& v = n.arrayValue(); + for (Array::const_iterator it = v.begin(); + it != v.end(); ++it) { + writeEntity(g, *it); + } + g.arrayEnd(); + } + break; + case etObject: + { + g.objectStart(); + const Object& v = n.objectValue(); + for (Object::const_iterator it = v.begin(); it != v.end(); ++it) { + g.encodeString(it->first); + writeEntity(g, it->second); + } + g.objectEnd(); + } + break; + } +} + +void Entity::ensureType(EntityType type) const +{ + if (type_ != type) { + format msg = format("Invalid type. Expected \"%1%\" actual %2%") % + typeToString(type) % typeToString(type_); + throw Exception(msg); + } +} + +String Entity::stringValue() const { + ensureType(etString); + return JsonParser::toStringValue(**boost::any_cast<std::shared_ptr<String> >(&value_)); +} + +String Entity::bytesValue() const { + ensureType(etString); + return JsonParser::toBytesValue(**boost::any_cast<std::shared_ptr<String> >(&value_)); +} + +std::string Entity::toString() const +{ + std::unique_ptr<OutputStream> out = memoryOutputStream(); + JsonGenerator<JsonNullFormatter> g; + g.init(*out); + writeEntity(g, *this); + g.flush(); + std::unique_ptr<InputStream> in = memoryInputStream(*out); + const uint8_t *p = 0; + size_t n = 0; + size_t c = 0; + while (in->next(&p, &n)) { + c += n; + } + std::string result; + result.resize(c); + c = 0; + std::unique_ptr<InputStream> in2 = memoryInputStream(*out); + while (in2->next(&p, &n)) { + ::memcpy(&result[c], p, n); + c += n; + } + return result; +} + +} +} + diff --git a/contrib/libs/apache/avro/impl/json/JsonDom.hh b/contrib/libs/apache/avro/impl/json/JsonDom.hh index e1f549dfea..7eb412aa2f 100644 --- a/contrib/libs/apache/avro/impl/json/JsonDom.hh +++ b/contrib/libs/apache/avro/impl/json/JsonDom.hh @@ -1,162 +1,162 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef avro_json_JsonDom_hh__ -#define avro_json_JsonDom_hh__ - -#include <iostream> -#include <stdint.h> -#include <map> -#include <string> -#include <vector> -#include <memory> - -#include "boost/any.hpp" -#include "Config.hh" - -namespace avro { - -class AVRO_DECL InputStream; - -namespace json { -class Entity; - -typedef bool Bool; -typedef int64_t Long; -typedef double Double; -typedef std::string String; -typedef std::vector<Entity> Array; -typedef std::map<std::string, Entity> Object; - -class AVRO_DECL JsonParser; -class JsonNullFormatter; - -template<typename F = JsonNullFormatter> -class AVRO_DECL JsonGenerator; - -enum EntityType { - etNull, - etBool, - etLong, - etDouble, - etString, - etArray, - etObject -}; - -const char* typeToString(EntityType t); - -class AVRO_DECL Entity { - EntityType type_; - boost::any value_; - size_t line_; // can't be const else noncopyable... - - void ensureType(EntityType) const; -public: - Entity(size_t line = 0) : type_(etNull), line_(line) { } - Entity(Bool v, size_t line = 0) : type_(etBool), value_(v), line_(line) { } - Entity(Long v, size_t line = 0) : type_(etLong), value_(v), line_(line) { } - Entity(Double v, size_t line = 0) : type_(etDouble), value_(v), line_(line) { } - Entity(const std::shared_ptr<String>& v, size_t line = 0) : type_(etString), value_(v), line_(line) { } - Entity(const std::shared_ptr<Array>& v, size_t line = 0) : type_(etArray), value_(v), line_(line) { } - Entity(const std::shared_ptr<Object>& v, size_t line = 0) : type_(etObject), value_(v), line_(line) { } - - EntityType type() const { return type_; } - - size_t line() const { return line_; } - - Bool boolValue() const { - ensureType(etBool); - return boost::any_cast<Bool>(value_); - } - - Long longValue() const { - ensureType(etLong); - return boost::any_cast<Long>(value_); - } - - Double doubleValue() const { - ensureType(etDouble); - return boost::any_cast<Double>(value_); - } - - String stringValue() const; - - String bytesValue() const; - - const Array& arrayValue() const { - ensureType(etArray); - return **boost::any_cast<std::shared_ptr<Array> >(&value_); - } - - const Object& objectValue() const { - ensureType(etObject); - return **boost::any_cast<std::shared_ptr<Object> >(&value_); - } - - std::string toString() const; -}; - -template <typename T> -struct type_traits { -}; - -template <> struct type_traits<bool> { - static EntityType type() { return etBool; } - static const char* name() { return "bool"; } -}; - -template <> struct type_traits<int64_t> { - static EntityType type() { return etLong; } - static const char* name() { return "long"; } -}; - -template <> struct type_traits<double> { - static EntityType type() { return etDouble; } - static const char* name() { return "double"; } -}; - -template <> struct type_traits<std::string> { - static EntityType type() { return etString; } - static const char* name() { return "string"; } -}; - -template <> struct type_traits<std::vector<Entity> > { - static EntityType type() { return etArray; } - static const char* name() { return "array"; } -}; - -template <> struct type_traits<std::map<std::string, Entity> > { - static EntityType type() { return etObject; } - static const char* name() { return "object"; } -}; - -AVRO_DECL Entity readEntity(JsonParser& p); - -AVRO_DECL Entity loadEntity(InputStream& in); -AVRO_DECL Entity loadEntity(const char* text); -AVRO_DECL Entity loadEntity(const uint8_t* text, size_t len); - -void writeEntity(JsonGenerator<JsonNullFormatter>& g, const Entity& n); - -} -} - -#endif - - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_json_JsonDom_hh__ +#define avro_json_JsonDom_hh__ + +#include <iostream> +#include <stdint.h> +#include <map> +#include <string> +#include <vector> +#include <memory> + +#include "boost/any.hpp" +#include "Config.hh" + +namespace avro { + +class AVRO_DECL InputStream; + +namespace json { +class Entity; + +typedef bool Bool; +typedef int64_t Long; +typedef double Double; +typedef std::string String; +typedef std::vector<Entity> Array; +typedef std::map<std::string, Entity> Object; + +class AVRO_DECL JsonParser; +class JsonNullFormatter; + +template<typename F = JsonNullFormatter> +class AVRO_DECL JsonGenerator; + +enum EntityType { + etNull, + etBool, + etLong, + etDouble, + etString, + etArray, + etObject +}; + +const char* typeToString(EntityType t); + +class AVRO_DECL Entity { + EntityType type_; + boost::any value_; + size_t line_; // can't be const else noncopyable... + + void ensureType(EntityType) const; +public: + Entity(size_t line = 0) : type_(etNull), line_(line) { } + Entity(Bool v, size_t line = 0) : type_(etBool), value_(v), line_(line) { } + Entity(Long v, size_t line = 0) : type_(etLong), value_(v), line_(line) { } + Entity(Double v, size_t line = 0) : type_(etDouble), value_(v), line_(line) { } + Entity(const std::shared_ptr<String>& v, size_t line = 0) : type_(etString), value_(v), line_(line) { } + Entity(const std::shared_ptr<Array>& v, size_t line = 0) : type_(etArray), value_(v), line_(line) { } + Entity(const std::shared_ptr<Object>& v, size_t line = 0) : type_(etObject), value_(v), line_(line) { } + + EntityType type() const { return type_; } + + size_t line() const { return line_; } + + Bool boolValue() const { + ensureType(etBool); + return boost::any_cast<Bool>(value_); + } + + Long longValue() const { + ensureType(etLong); + return boost::any_cast<Long>(value_); + } + + Double doubleValue() const { + ensureType(etDouble); + return boost::any_cast<Double>(value_); + } + + String stringValue() const; + + String bytesValue() const; + + const Array& arrayValue() const { + ensureType(etArray); + return **boost::any_cast<std::shared_ptr<Array> >(&value_); + } + + const Object& objectValue() const { + ensureType(etObject); + return **boost::any_cast<std::shared_ptr<Object> >(&value_); + } + + std::string toString() const; +}; + +template <typename T> +struct type_traits { +}; + +template <> struct type_traits<bool> { + static EntityType type() { return etBool; } + static const char* name() { return "bool"; } +}; + +template <> struct type_traits<int64_t> { + static EntityType type() { return etLong; } + static const char* name() { return "long"; } +}; + +template <> struct type_traits<double> { + static EntityType type() { return etDouble; } + static const char* name() { return "double"; } +}; + +template <> struct type_traits<std::string> { + static EntityType type() { return etString; } + static const char* name() { return "string"; } +}; + +template <> struct type_traits<std::vector<Entity> > { + static EntityType type() { return etArray; } + static const char* name() { return "array"; } +}; + +template <> struct type_traits<std::map<std::string, Entity> > { + static EntityType type() { return etObject; } + static const char* name() { return "object"; } +}; + +AVRO_DECL Entity readEntity(JsonParser& p); + +AVRO_DECL Entity loadEntity(InputStream& in); +AVRO_DECL Entity loadEntity(const char* text); +AVRO_DECL Entity loadEntity(const uint8_t* text, size_t len); + +void writeEntity(JsonGenerator<JsonNullFormatter>& g, const Entity& n); + +} +} + +#endif + + diff --git a/contrib/libs/apache/avro/impl/json/JsonIO.cc b/contrib/libs/apache/avro/impl/json/JsonIO.cc index c11a722ad4..d09ea2315f 100644 --- a/contrib/libs/apache/avro/impl/json/JsonIO.cc +++ b/contrib/libs/apache/avro/impl/json/JsonIO.cc @@ -1,442 +1,442 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "JsonIO.hh" - -namespace avro { -namespace json { - -using std::ostringstream; -using std::string; - -const char* const -JsonParser::tokenNames[] = { - "Null", - "Bool", - "Integer", - "Double", - "String", - "Array start", - "Array end", - "Object start", - "Object end", -}; - -char JsonParser::next() -{ - char ch = hasNext ? nextChar : ' '; - while (isspace(ch)) { - if (ch == '\n') { - line_++; - } - ch = in_.read(); - } - hasNext = false; - return ch; -} - -void JsonParser::expectToken(Token tk) -{ - if (advance() != tk) { - if (tk == tkDouble) { - if(cur() == tkString - && (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) { - curToken = tkDouble; - dv = sv == "Infinity" ? - std::numeric_limits<double>::infinity() : - sv == "-Infinity" ? - -std::numeric_limits<double>::infinity() : - std::numeric_limits<double>::quiet_NaN(); - return; - } else if (cur() == tkLong) { - dv = double(lv); - return; - } - } - ostringstream oss; - oss << "Incorrect token in the stream. Expected: " - << JsonParser::toString(tk) << ", found " - << JsonParser::toString(cur()); - throw Exception(oss.str()); - } -} - -JsonParser::Token JsonParser::doAdvance() -{ - char ch = next(); - if (ch == ']') { - if (curState == stArray0 || curState == stArrayN) { - curState = stateStack.top(); - stateStack.pop(); - return tkArrayEnd; - } else { - throw unexpected(ch); - } - } else if (ch == '}') { - if (curState == stObject0 || curState == stObjectN) { - curState = stateStack.top(); - stateStack.pop(); - return tkObjectEnd; - } else { - throw unexpected(ch); - } - } else if (ch == ',') { - if (curState != stObjectN && curState != stArrayN) { - throw unexpected(ch); - } - if (curState == stObjectN) { - curState = stObject0; - } - ch = next(); - } else if (ch == ':') { - if (curState != stKey) { - throw unexpected(ch); - } - curState = stObjectN; - ch = next(); - } - - if (curState == stObject0) { - if (ch != '"') { - throw unexpected(ch); - } - curState = stKey; - } else if (curState == stArray0) { - curState = stArrayN; - } - - switch (ch) { - case '[': - stateStack.push(curState); - curState = stArray0; - return tkArrayStart; - case '{': - stateStack.push(curState); - curState = stObject0; - return tkObjectStart; - case '"': - return tryString(); - case 't': - bv = true; - return tryLiteral("rue", 3, tkBool); - case 'f': - bv = false; - return tryLiteral("alse", 4, tkBool); - case 'n': - return tryLiteral("ull", 3, tkNull); - default: - if (isdigit(ch) || ch == '-') { - return tryNumber(ch); - } else { - throw unexpected(ch); - } - } -} - -JsonParser::Token JsonParser::tryNumber(char ch) -{ - sv.clear(); - sv.push_back(ch); - - hasNext = false; - int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2; - for (; ;) { - switch (state) { - case 0: - if (in_.hasMore()) { - ch = in_.read(); - if (isdigit(ch)) { - state = (ch == '0') ? 1 : 2; - sv.push_back(ch); - continue; - } - hasNext = true; - } - break; - case 1: - if (in_.hasMore()) { - ch = in_.read(); - if (ch == '.') { - state = 3; - sv.push_back(ch); - continue; - } else if (ch == 'e' || ch == 'E') { - sv.push_back(ch); - state = 5; - continue; - } - hasNext = true; - } - break; - case 2: - if (in_.hasMore()) { - ch = in_.read(); - if (isdigit(ch)) { - sv.push_back(ch); - continue; - } else if (ch == '.') { - state = 3; - sv.push_back(ch); - continue; - } else if (ch == 'e' || ch == 'E') { - sv.push_back(ch); - state = 5; - continue; - } - hasNext = true; - } - break; - case 3: - case 6: - if (in_.hasMore()) { - ch = in_.read(); - if (isdigit(ch)) { - sv.push_back(ch); - state++; - continue; - } - hasNext = true; - } - break; - case 4: - if (in_.hasMore()) { - ch = in_.read(); - if (isdigit(ch)) { - sv.push_back(ch); - continue; - } else if (ch == 'e' || ch == 'E') { - sv.push_back(ch); - state = 5; - continue; - } - hasNext = true; - } - break; - case 5: - if (in_.hasMore()) { - ch = in_.read(); - if (ch == '+' || ch == '-') { - sv.push_back(ch); - state = 6; - continue; - } else if (isdigit(ch)) { - sv.push_back(ch); - state = 7; - continue; - } - hasNext = true; - } - break; - case 7: - if (in_.hasMore()) { - ch = in_.read(); - if (isdigit(ch)) { - sv.push_back(ch); - continue; - } - hasNext = true; - } - break; - } - if (state == 1 || state == 2 || state == 4 || state == 7) { - if (hasNext) { - nextChar = ch; - } - std::istringstream iss(sv); - if (state == 1 || state == 2) { - iss >> lv; - return tkLong; - } else { - iss >> dv; - return tkDouble; - } - } else { - if (hasNext) { - throw unexpected(ch); - } else { - throw Exception("Unexpected EOF"); - } - } - } -} - -JsonParser::Token JsonParser::tryString() -{ - sv.clear(); - for ( ; ;) { - char ch = in_.read(); - if (ch == '"') { - return tkString; - } else if (ch == '\\') { - ch = in_.read(); - switch (ch) { - case '"': - case '\\': - case '/': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - sv.push_back('\\'); - sv.push_back(ch); - break; - case 'u': - case 'U': - { - uint32_t n = 0; - char e[4]; - in_.readBytes(reinterpret_cast<uint8_t*>(e), 4); - sv.push_back('\\'); - sv.push_back(ch); - for (int i = 0; i < 4; i++) { - n *= 16; - char c = e[i]; - if (isdigit(c) || - (c >= 'a' && c <= 'f') || - (c >= 'A' && c <= 'F')) { - sv.push_back(c); - } else { - throw unexpected(c); - } - } - } - break; - default: - throw unexpected(ch); - } - } else { - sv.push_back(ch); - } - } -} - - -string JsonParser::decodeString(const string& s, bool binary) -{ - string result; - for (string::const_iterator it = s.begin(); it != s.end(); ++it) { - char ch = *it; - if (ch == '\\') { - ch = *++it; - switch (ch) { - case '"': - case '\\': - case '/': - result.push_back(ch); - continue; - case 'b': - result.push_back('\b'); - continue; - case 'f': - result.push_back('\f'); - continue; - case 'n': - result.push_back('\n'); - continue; - case 'r': - result.push_back('\r'); - continue; - case 't': - result.push_back('\t'); - continue; - case 'u': - case 'U': - { - uint32_t n = 0; - char e[4]; - for (int i = 0; i < 4; i++) { - n *= 16; - char c = *++it; - e[i] = c; - if (isdigit(c)) { - n += c - '0'; - } else if (c >= 'a' && c <= 'f') { - n += c - 'a' + 10; - } else if (c >= 'A' && c <= 'F') { - n += c - 'A' + 10; - } - } - if (binary) { - if (n > 0xff) { - throw Exception(boost::format( - "Invalid byte for binary: %1%%2%") % ch % - string(e, 4)); - } else { - result.push_back(n); - continue; - } - } - if (n < 0x80) { - result.push_back(n); - } else if (n < 0x800) { - result.push_back((n >> 6) | 0xc0); - result.push_back((n & 0x3f) | 0x80); - } else if (n < 0x10000) { - result.push_back((n >> 12) | 0xe0); - result.push_back(((n >> 6)& 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); - } else if (n < 110000) { - result.push_back((n >> 18) | 0xf0); - result.push_back(((n >> 12)& 0x3f) | 0x80); - result.push_back(((n >> 6)& 0x3f) | 0x80); - result.push_back((n & 0x3f) | 0x80); - } else { - throw Exception(boost::format( - "Invalid unicode value: %1%i%2%") % ch % - string(e, 4)); - } - } - continue; - } - } else { - result.push_back(ch); - } - } - return result; -} - -Exception JsonParser::unexpected(unsigned char c) -{ - std::ostringstream oss; - oss << "Unexpected character in json " << toHex(c / 16) << toHex(c % 16); - return Exception(oss.str()); -} - -JsonParser::Token JsonParser::tryLiteral(const char exp[], size_t n, Token tk) -{ - char c[100]; - in_.readBytes(reinterpret_cast<uint8_t*>(c), n); - for (size_t i = 0; i < n; ++i) { - if (c[i] != exp[i]) { - throw unexpected(c[i]); - } - } - if (in_.hasMore()) { - nextChar = in_.read(); - if (isdigit(nextChar) || isalpha(nextChar)) { - throw unexpected(nextChar); - } - hasNext = true; - } - return tk; -} - -} -} - +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "JsonIO.hh" + +namespace avro { +namespace json { + +using std::ostringstream; +using std::string; + +const char* const +JsonParser::tokenNames[] = { + "Null", + "Bool", + "Integer", + "Double", + "String", + "Array start", + "Array end", + "Object start", + "Object end", +}; + +char JsonParser::next() +{ + char ch = hasNext ? nextChar : ' '; + while (isspace(ch)) { + if (ch == '\n') { + line_++; + } + ch = in_.read(); + } + hasNext = false; + return ch; +} + +void JsonParser::expectToken(Token tk) +{ + if (advance() != tk) { + if (tk == tkDouble) { + if(cur() == tkString + && (sv == "Infinity" || sv == "-Infinity" || sv == "NaN")) { + curToken = tkDouble; + dv = sv == "Infinity" ? + std::numeric_limits<double>::infinity() : + sv == "-Infinity" ? + -std::numeric_limits<double>::infinity() : + std::numeric_limits<double>::quiet_NaN(); + return; + } else if (cur() == tkLong) { + dv = double(lv); + return; + } + } + ostringstream oss; + oss << "Incorrect token in the stream. Expected: " + << JsonParser::toString(tk) << ", found " + << JsonParser::toString(cur()); + throw Exception(oss.str()); + } +} + +JsonParser::Token JsonParser::doAdvance() +{ + char ch = next(); + if (ch == ']') { + if (curState == stArray0 || curState == stArrayN) { + curState = stateStack.top(); + stateStack.pop(); + return tkArrayEnd; + } else { + throw unexpected(ch); + } + } else if (ch == '}') { + if (curState == stObject0 || curState == stObjectN) { + curState = stateStack.top(); + stateStack.pop(); + return tkObjectEnd; + } else { + throw unexpected(ch); + } + } else if (ch == ',') { + if (curState != stObjectN && curState != stArrayN) { + throw unexpected(ch); + } + if (curState == stObjectN) { + curState = stObject0; + } + ch = next(); + } else if (ch == ':') { + if (curState != stKey) { + throw unexpected(ch); + } + curState = stObjectN; + ch = next(); + } + + if (curState == stObject0) { + if (ch != '"') { + throw unexpected(ch); + } + curState = stKey; + } else if (curState == stArray0) { + curState = stArrayN; + } + + switch (ch) { + case '[': + stateStack.push(curState); + curState = stArray0; + return tkArrayStart; + case '{': + stateStack.push(curState); + curState = stObject0; + return tkObjectStart; + case '"': + return tryString(); + case 't': + bv = true; + return tryLiteral("rue", 3, tkBool); + case 'f': + bv = false; + return tryLiteral("alse", 4, tkBool); + case 'n': + return tryLiteral("ull", 3, tkNull); + default: + if (isdigit(ch) || ch == '-') { + return tryNumber(ch); + } else { + throw unexpected(ch); + } + } +} + +JsonParser::Token JsonParser::tryNumber(char ch) +{ + sv.clear(); + sv.push_back(ch); + + hasNext = false; + int state = (ch == '-') ? 0 : (ch == '0') ? 1 : 2; + for (; ;) { + switch (state) { + case 0: + if (in_.hasMore()) { + ch = in_.read(); + if (isdigit(ch)) { + state = (ch == '0') ? 1 : 2; + sv.push_back(ch); + continue; + } + hasNext = true; + } + break; + case 1: + if (in_.hasMore()) { + ch = in_.read(); + if (ch == '.') { + state = 3; + sv.push_back(ch); + continue; + } else if (ch == 'e' || ch == 'E') { + sv.push_back(ch); + state = 5; + continue; + } + hasNext = true; + } + break; + case 2: + if (in_.hasMore()) { + ch = in_.read(); + if (isdigit(ch)) { + sv.push_back(ch); + continue; + } else if (ch == '.') { + state = 3; + sv.push_back(ch); + continue; + } else if (ch == 'e' || ch == 'E') { + sv.push_back(ch); + state = 5; + continue; + } + hasNext = true; + } + break; + case 3: + case 6: + if (in_.hasMore()) { + ch = in_.read(); + if (isdigit(ch)) { + sv.push_back(ch); + state++; + continue; + } + hasNext = true; + } + break; + case 4: + if (in_.hasMore()) { + ch = in_.read(); + if (isdigit(ch)) { + sv.push_back(ch); + continue; + } else if (ch == 'e' || ch == 'E') { + sv.push_back(ch); + state = 5; + continue; + } + hasNext = true; + } + break; + case 5: + if (in_.hasMore()) { + ch = in_.read(); + if (ch == '+' || ch == '-') { + sv.push_back(ch); + state = 6; + continue; + } else if (isdigit(ch)) { + sv.push_back(ch); + state = 7; + continue; + } + hasNext = true; + } + break; + case 7: + if (in_.hasMore()) { + ch = in_.read(); + if (isdigit(ch)) { + sv.push_back(ch); + continue; + } + hasNext = true; + } + break; + } + if (state == 1 || state == 2 || state == 4 || state == 7) { + if (hasNext) { + nextChar = ch; + } + std::istringstream iss(sv); + if (state == 1 || state == 2) { + iss >> lv; + return tkLong; + } else { + iss >> dv; + return tkDouble; + } + } else { + if (hasNext) { + throw unexpected(ch); + } else { + throw Exception("Unexpected EOF"); + } + } + } +} + +JsonParser::Token JsonParser::tryString() +{ + sv.clear(); + for ( ; ;) { + char ch = in_.read(); + if (ch == '"') { + return tkString; + } else if (ch == '\\') { + ch = in_.read(); + switch (ch) { + case '"': + case '\\': + case '/': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + sv.push_back('\\'); + sv.push_back(ch); + break; + case 'u': + case 'U': + { + uint32_t n = 0; + char e[4]; + in_.readBytes(reinterpret_cast<uint8_t*>(e), 4); + sv.push_back('\\'); + sv.push_back(ch); + for (int i = 0; i < 4; i++) { + n *= 16; + char c = e[i]; + if (isdigit(c) || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F')) { + sv.push_back(c); + } else { + throw unexpected(c); + } + } + } + break; + default: + throw unexpected(ch); + } + } else { + sv.push_back(ch); + } + } +} + + +string JsonParser::decodeString(const string& s, bool binary) +{ + string result; + for (string::const_iterator it = s.begin(); it != s.end(); ++it) { + char ch = *it; + if (ch == '\\') { + ch = *++it; + switch (ch) { + case '"': + case '\\': + case '/': + result.push_back(ch); + continue; + case 'b': + result.push_back('\b'); + continue; + case 'f': + result.push_back('\f'); + continue; + case 'n': + result.push_back('\n'); + continue; + case 'r': + result.push_back('\r'); + continue; + case 't': + result.push_back('\t'); + continue; + case 'u': + case 'U': + { + uint32_t n = 0; + char e[4]; + for (int i = 0; i < 4; i++) { + n *= 16; + char c = *++it; + e[i] = c; + if (isdigit(c)) { + n += c - '0'; + } else if (c >= 'a' && c <= 'f') { + n += c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + n += c - 'A' + 10; + } + } + if (binary) { + if (n > 0xff) { + throw Exception(boost::format( + "Invalid byte for binary: %1%%2%") % ch % + string(e, 4)); + } else { + result.push_back(n); + continue; + } + } + if (n < 0x80) { + result.push_back(n); + } else if (n < 0x800) { + result.push_back((n >> 6) | 0xc0); + result.push_back((n & 0x3f) | 0x80); + } else if (n < 0x10000) { + result.push_back((n >> 12) | 0xe0); + result.push_back(((n >> 6)& 0x3f) | 0x80); + result.push_back((n & 0x3f) | 0x80); + } else if (n < 110000) { + result.push_back((n >> 18) | 0xf0); + result.push_back(((n >> 12)& 0x3f) | 0x80); + result.push_back(((n >> 6)& 0x3f) | 0x80); + result.push_back((n & 0x3f) | 0x80); + } else { + throw Exception(boost::format( + "Invalid unicode value: %1%i%2%") % ch % + string(e, 4)); + } + } + continue; + } + } else { + result.push_back(ch); + } + } + return result; +} + +Exception JsonParser::unexpected(unsigned char c) +{ + std::ostringstream oss; + oss << "Unexpected character in json " << toHex(c / 16) << toHex(c % 16); + return Exception(oss.str()); +} + +JsonParser::Token JsonParser::tryLiteral(const char exp[], size_t n, Token tk) +{ + char c[100]; + in_.readBytes(reinterpret_cast<uint8_t*>(c), n); + for (size_t i = 0; i < n; ++i) { + if (c[i] != exp[i]) { + throw unexpected(c[i]); + } + } + if (in_.hasMore()) { + nextChar = in_.read(); + if (isdigit(nextChar) || isalpha(nextChar)) { + throw unexpected(nextChar); + } + hasNext = true; + } + return tk; +} + +} +} + diff --git a/contrib/libs/apache/avro/impl/json/JsonIO.hh b/contrib/libs/apache/avro/impl/json/JsonIO.hh index 5ae7ae07dc..705c5fc4fd 100644 --- a/contrib/libs/apache/avro/impl/json/JsonIO.hh +++ b/contrib/libs/apache/avro/impl/json/JsonIO.hh @@ -1,482 +1,482 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef avro_json_JsonIO_hh__ -#define avro_json_JsonIO_hh__ - -#include <locale> -#include <stack> -#include <string> -#include <sstream> -#include <boost/math/special_functions/fpclassify.hpp> -#include <boost/lexical_cast.hpp> -#include <boost/utility.hpp> - -#include "Config.hh" -#include "Stream.hh" - -namespace avro { -namespace json { - -inline char toHex(unsigned int n) { - return (n < 10) ? (n + '0') : (n + 'a' - 10); -} - - -class AVRO_DECL JsonParser : boost::noncopyable { -public: - enum Token { - tkNull, - tkBool, - tkLong, - tkDouble, - tkString, - tkArrayStart, - tkArrayEnd, - tkObjectStart, - tkObjectEnd - }; - - size_t line() const { return line_; } - -private: - enum State { - stValue, // Expect a data type - stArray0, // Expect a data type or ']' - stArrayN, // Expect a ',' or ']' - stObject0, // Expect a string or a '}' - stObjectN, // Expect a ',' or '}' - stKey // Expect a ':' - }; - std::stack<State> stateStack; - State curState; - bool hasNext; - char nextChar; - bool peeked; - - StreamReader in_; - Token curToken; - bool bv; - int64_t lv; - double dv; - std::string sv; - size_t line_; - - Token doAdvance(); - Token tryLiteral(const char exp[], size_t n, Token tk); - Token tryNumber(char ch); - Token tryString(); - Exception unexpected(unsigned char ch); - char next(); - - static std::string decodeString(const std::string& s, bool binary); - -public: - JsonParser() : curState(stValue), hasNext(false), peeked(false), line_(1) { } - - void init(InputStream& is) { - // Clear by swapping with an empty stack - std::stack<State>().swap(stateStack); - curState = stValue; - hasNext = false; - peeked = false; - line_ = 1; - in_.reset(is); - } - - Token advance() { - if (! peeked) { - curToken = doAdvance(); - } else { - peeked = false; - } - return curToken; - } - - Token peek() { - if (! peeked) { - curToken = doAdvance(); - peeked = true; - } - return curToken; - } - - void expectToken(Token tk); - - bool boolValue() const { - return bv; - } - - Token cur() const { - return curToken; - } - - double doubleValue() const { - return dv; - } - - int64_t longValue() const { - return lv; - } - - const std::string& rawString() const { - return sv; - } - - std::string stringValue() const { - return decodeString(sv, false); - } - - std::string bytesValue() const { - return decodeString(sv, true); - } - - void drain() { - if (!stateStack.empty() || peeked) { - throw Exception("Invalid state for draining"); - } - in_.drain(hasNext); - hasNext = false; - } - - /** - * Return UTF-8 encoded string value. - */ - static std::string toStringValue(const std::string& sv) { - return decodeString(sv, false); - } - - /** - * Return byte-encoded string value. It is an error if the input - * JSON string contained unicode characters more than "\u00ff'. - */ - static std::string toBytesValue(const std::string& sv) { - return decodeString(sv, true); - } - - static const char* const tokenNames[]; - - static const char* toString(Token tk) { - return tokenNames[tk]; - } -}; - -class AVRO_DECL JsonNullFormatter { -public: - JsonNullFormatter(StreamWriter&) { } - - void handleObjectStart() {} - void handleObjectEnd() {} - void handleValueEnd() {} - void handleColon() {} -}; - -class AVRO_DECL JsonPrettyFormatter { - StreamWriter& out_; - size_t level_; - std::vector<uint8_t> indent_; - - static const int CHARS_PER_LEVEL = 2; - - void printIndent() { - size_t charsToIndent = level_ * CHARS_PER_LEVEL; - if (indent_.size() < charsToIndent) { - indent_.resize(charsToIndent * 2, ' '); - } - out_.writeBytes(indent_.data(), charsToIndent); - } -public: - JsonPrettyFormatter(StreamWriter& out) : out_(out), level_(0), indent_(10, ' ') { } - - void handleObjectStart() { - out_.write('\n'); - ++level_; - printIndent(); - } - - void handleObjectEnd() { - out_.write('\n'); - --level_; - printIndent(); - } - - void handleValueEnd() { - out_.write('\n'); - printIndent(); - } - - void handleColon() { - out_.write(' '); - } -}; - -template <class F> -class AVRO_DECL JsonGenerator { - StreamWriter out_; - F formatter_; - enum State { - stStart, - stArray0, - stArrayN, - stMap0, - stMapN, - stKey, - }; - - std::stack<State> stateStack; - State top; - - void write(const char *b, const char* p) { - if (b != p) { - out_.writeBytes(reinterpret_cast<const uint8_t*>(b), p - b); - } - } - - void escape(char c, const char* b, const char *p) { - write(b, p); - out_.write('\\'); - out_.write(c); - } - - void escapeCtl(char c) { - escapeUnicode(static_cast<uint8_t>(c)); - } - - void writeHex(char c) { - out_.write(toHex((static_cast<unsigned char>(c)) / 16)); - out_.write(toHex((static_cast<unsigned char>(c)) % 16)); - } - - void escapeUnicode(uint32_t c) { - out_.write('\\'); - out_.write('u'); - writeHex((c >> 8) & 0xff); - writeHex(c & 0xff); - } - void doEncodeString(const char* b, size_t len, bool binary) { - const char* e = b + len; - out_.write('"'); - for (const char* p = b; p != e; p++) { - if ((*p & 0x80) != 0) { - write(b, p); - if (binary) { - escapeCtl(*p); - } else if ((*p & 0x40) == 0) { - throw Exception("Invalid UTF-8 sequence"); - } else { - int more = 1; - uint32_t value = 0; - if ((*p & 0x20) != 0) { - more++; - if ((*p & 0x10) != 0) { - more++; - if ((*p & 0x08) != 0) { - throw Exception("Invalid UTF-8 sequence"); - } else { - value = *p & 0x07; - } - } else { - value = *p & 0x0f; - } - } else { - value = *p & 0x1f; - } - for (int i = 0; i < more; ++i) { - if (++p == e || (*p & 0xc0) != 0x80) { - throw Exception("Invalid UTF-8 sequence"); - } - value <<= 6; - value |= *p & 0x3f; - } - escapeUnicode(value); - } - } else { - switch (*p) { - case '\\': - case '"': - case '/': - escape(*p, b, p); - break; - case '\b': - escape('b', b, p); - break; - case '\f': - escape('f', b, p); - break; - case '\n': - escape('n', b, p); - break; - case '\r': - escape('r', b, p); - break; - case '\t': - escape('t', b, p); - break; - default: - if (std::iscntrl(*p, std::locale::classic())) { - write(b, p); - escapeCtl(*p); - break; - } else { - continue; - } - } - } - b = p + 1; - } - write(b, e); - out_.write('"'); - } - - void sep() { - if (top == stArrayN) { - out_.write(','); - formatter_.handleValueEnd(); - } else if (top == stArray0) { - top = stArrayN; - } - } - - void sep2() { - if (top == stKey) { - top = stMapN; - } - } - -public: - JsonGenerator() : formatter_(out_), top(stStart) { } - - void init(OutputStream& os) { - out_.reset(os); - } - - void flush() { - out_.flush(); - } - - int64_t byteCount() const { - return out_.byteCount(); - } - - void encodeNull() { - sep(); - out_.writeBytes(reinterpret_cast<const uint8_t*>("null"), 4); - sep2(); - } - - void encodeBool(bool b) { - sep(); - if (b) { - out_.writeBytes(reinterpret_cast<const uint8_t*>("true"), 4); - } else { - out_.writeBytes(reinterpret_cast<const uint8_t*>("false"), 5); - } - sep2(); - } - - template <typename T> - void encodeNumber(T t) { - sep(); - std::ostringstream oss; - oss << boost::lexical_cast<std::string>(t); - const std::string& s = oss.str(); - out_.writeBytes(reinterpret_cast<const uint8_t*>(s.data()), s.size()); - sep2(); - } - - void encodeNumber(double t) { - sep(); - std::ostringstream oss; - if (boost::math::isfinite(t)) { - oss << boost::lexical_cast<std::string>(t); - } else if (boost::math::isnan(t)) { - oss << "NaN"; - } else if (t == std::numeric_limits<double>::infinity()) { - oss << "Infinity"; - } else { - oss << "-Infinity"; - } - const std::string& s = oss.str(); - out_.writeBytes(reinterpret_cast<const uint8_t*>(s.data()), s.size()); - sep2(); - } - - - void encodeString(const std::string& s) { - if (top == stMap0) { - top = stKey; - } else if (top == stMapN) { - out_.write(','); - formatter_.handleValueEnd(); - top = stKey; - } else if (top == stKey) { - top = stMapN; - } else { - sep(); - } - doEncodeString(s.c_str(), s.size(), false); - if (top == stKey) { - out_.write(':'); - formatter_.handleColon(); - } - } - - void encodeBinary(const uint8_t* bytes, size_t len) { - sep(); - doEncodeString(reinterpret_cast<const char *>(bytes), len, true); - sep2(); - } - - void arrayStart() { - sep(); - stateStack.push(top); - top = stArray0; - out_.write('['); - formatter_.handleObjectStart(); - } - - void arrayEnd() { - top = stateStack.top(); - stateStack.pop(); - formatter_.handleObjectEnd(); - out_.write(']'); - sep2(); - } - - void objectStart() { - sep(); - stateStack.push(top); - top = stMap0; - out_.write('{'); - formatter_.handleObjectStart(); - } - - void objectEnd() { - top = stateStack.top(); - stateStack.pop(); - formatter_.handleObjectEnd(); - out_.write('}'); - sep2(); - } - -}; - -} -} - -#endif +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef avro_json_JsonIO_hh__ +#define avro_json_JsonIO_hh__ + +#include <locale> +#include <stack> +#include <string> +#include <sstream> +#include <boost/math/special_functions/fpclassify.hpp> +#include <boost/lexical_cast.hpp> +#include <boost/utility.hpp> + +#include "Config.hh" +#include "Stream.hh" + +namespace avro { +namespace json { + +inline char toHex(unsigned int n) { + return (n < 10) ? (n + '0') : (n + 'a' - 10); +} + + +class AVRO_DECL JsonParser : boost::noncopyable { +public: + enum Token { + tkNull, + tkBool, + tkLong, + tkDouble, + tkString, + tkArrayStart, + tkArrayEnd, + tkObjectStart, + tkObjectEnd + }; + + size_t line() const { return line_; } + +private: + enum State { + stValue, // Expect a data type + stArray0, // Expect a data type or ']' + stArrayN, // Expect a ',' or ']' + stObject0, // Expect a string or a '}' + stObjectN, // Expect a ',' or '}' + stKey // Expect a ':' + }; + std::stack<State> stateStack; + State curState; + bool hasNext; + char nextChar; + bool peeked; + + StreamReader in_; + Token curToken; + bool bv; + int64_t lv; + double dv; + std::string sv; + size_t line_; + + Token doAdvance(); + Token tryLiteral(const char exp[], size_t n, Token tk); + Token tryNumber(char ch); + Token tryString(); + Exception unexpected(unsigned char ch); + char next(); + + static std::string decodeString(const std::string& s, bool binary); + +public: + JsonParser() : curState(stValue), hasNext(false), peeked(false), line_(1) { } + + void init(InputStream& is) { + // Clear by swapping with an empty stack + std::stack<State>().swap(stateStack); + curState = stValue; + hasNext = false; + peeked = false; + line_ = 1; + in_.reset(is); + } + + Token advance() { + if (! peeked) { + curToken = doAdvance(); + } else { + peeked = false; + } + return curToken; + } + + Token peek() { + if (! peeked) { + curToken = doAdvance(); + peeked = true; + } + return curToken; + } + + void expectToken(Token tk); + + bool boolValue() const { + return bv; + } + + Token cur() const { + return curToken; + } + + double doubleValue() const { + return dv; + } + + int64_t longValue() const { + return lv; + } + + const std::string& rawString() const { + return sv; + } + + std::string stringValue() const { + return decodeString(sv, false); + } + + std::string bytesValue() const { + return decodeString(sv, true); + } + + void drain() { + if (!stateStack.empty() || peeked) { + throw Exception("Invalid state for draining"); + } + in_.drain(hasNext); + hasNext = false; + } + + /** + * Return UTF-8 encoded string value. + */ + static std::string toStringValue(const std::string& sv) { + return decodeString(sv, false); + } + + /** + * Return byte-encoded string value. It is an error if the input + * JSON string contained unicode characters more than "\u00ff'. + */ + static std::string toBytesValue(const std::string& sv) { + return decodeString(sv, true); + } + + static const char* const tokenNames[]; + + static const char* toString(Token tk) { + return tokenNames[tk]; + } +}; + +class AVRO_DECL JsonNullFormatter { +public: + JsonNullFormatter(StreamWriter&) { } + + void handleObjectStart() {} + void handleObjectEnd() {} + void handleValueEnd() {} + void handleColon() {} +}; + +class AVRO_DECL JsonPrettyFormatter { + StreamWriter& out_; + size_t level_; + std::vector<uint8_t> indent_; + + static const int CHARS_PER_LEVEL = 2; + + void printIndent() { + size_t charsToIndent = level_ * CHARS_PER_LEVEL; + if (indent_.size() < charsToIndent) { + indent_.resize(charsToIndent * 2, ' '); + } + out_.writeBytes(indent_.data(), charsToIndent); + } +public: + JsonPrettyFormatter(StreamWriter& out) : out_(out), level_(0), indent_(10, ' ') { } + + void handleObjectStart() { + out_.write('\n'); + ++level_; + printIndent(); + } + + void handleObjectEnd() { + out_.write('\n'); + --level_; + printIndent(); + } + + void handleValueEnd() { + out_.write('\n'); + printIndent(); + } + + void handleColon() { + out_.write(' '); + } +}; + +template <class F> +class AVRO_DECL JsonGenerator { + StreamWriter out_; + F formatter_; + enum State { + stStart, + stArray0, + stArrayN, + stMap0, + stMapN, + stKey, + }; + + std::stack<State> stateStack; + State top; + + void write(const char *b, const char* p) { + if (b != p) { + out_.writeBytes(reinterpret_cast<const uint8_t*>(b), p - b); + } + } + + void escape(char c, const char* b, const char *p) { + write(b, p); + out_.write('\\'); + out_.write(c); + } + + void escapeCtl(char c) { + escapeUnicode(static_cast<uint8_t>(c)); + } + + void writeHex(char c) { + out_.write(toHex((static_cast<unsigned char>(c)) / 16)); + out_.write(toHex((static_cast<unsigned char>(c)) % 16)); + } + + void escapeUnicode(uint32_t c) { + out_.write('\\'); + out_.write('u'); + writeHex((c >> 8) & 0xff); + writeHex(c & 0xff); + } + void doEncodeString(const char* b, size_t len, bool binary) { + const char* e = b + len; + out_.write('"'); + for (const char* p = b; p != e; p++) { + if ((*p & 0x80) != 0) { + write(b, p); + if (binary) { + escapeCtl(*p); + } else if ((*p & 0x40) == 0) { + throw Exception("Invalid UTF-8 sequence"); + } else { + int more = 1; + uint32_t value = 0; + if ((*p & 0x20) != 0) { + more++; + if ((*p & 0x10) != 0) { + more++; + if ((*p & 0x08) != 0) { + throw Exception("Invalid UTF-8 sequence"); + } else { + value = *p & 0x07; + } + } else { + value = *p & 0x0f; + } + } else { + value = *p & 0x1f; + } + for (int i = 0; i < more; ++i) { + if (++p == e || (*p & 0xc0) != 0x80) { + throw Exception("Invalid UTF-8 sequence"); + } + value <<= 6; + value |= *p & 0x3f; + } + escapeUnicode(value); + } + } else { + switch (*p) { + case '\\': + case '"': + case '/': + escape(*p, b, p); + break; + case '\b': + escape('b', b, p); + break; + case '\f': + escape('f', b, p); + break; + case '\n': + escape('n', b, p); + break; + case '\r': + escape('r', b, p); + break; + case '\t': + escape('t', b, p); + break; + default: + if (std::iscntrl(*p, std::locale::classic())) { + write(b, p); + escapeCtl(*p); + break; + } else { + continue; + } + } + } + b = p + 1; + } + write(b, e); + out_.write('"'); + } + + void sep() { + if (top == stArrayN) { + out_.write(','); + formatter_.handleValueEnd(); + } else if (top == stArray0) { + top = stArrayN; + } + } + + void sep2() { + if (top == stKey) { + top = stMapN; + } + } + +public: + JsonGenerator() : formatter_(out_), top(stStart) { } + + void init(OutputStream& os) { + out_.reset(os); + } + + void flush() { + out_.flush(); + } + + int64_t byteCount() const { + return out_.byteCount(); + } + + void encodeNull() { + sep(); + out_.writeBytes(reinterpret_cast<const uint8_t*>("null"), 4); + sep2(); + } + + void encodeBool(bool b) { + sep(); + if (b) { + out_.writeBytes(reinterpret_cast<const uint8_t*>("true"), 4); + } else { + out_.writeBytes(reinterpret_cast<const uint8_t*>("false"), 5); + } + sep2(); + } + + template <typename T> + void encodeNumber(T t) { + sep(); + std::ostringstream oss; + oss << boost::lexical_cast<std::string>(t); + const std::string& s = oss.str(); + out_.writeBytes(reinterpret_cast<const uint8_t*>(s.data()), s.size()); + sep2(); + } + + void encodeNumber(double t) { + sep(); + std::ostringstream oss; + if (boost::math::isfinite(t)) { + oss << boost::lexical_cast<std::string>(t); + } else if (boost::math::isnan(t)) { + oss << "NaN"; + } else if (t == std::numeric_limits<double>::infinity()) { + oss << "Infinity"; + } else { + oss << "-Infinity"; + } + const std::string& s = oss.str(); + out_.writeBytes(reinterpret_cast<const uint8_t*>(s.data()), s.size()); + sep2(); + } + + + void encodeString(const std::string& s) { + if (top == stMap0) { + top = stKey; + } else if (top == stMapN) { + out_.write(','); + formatter_.handleValueEnd(); + top = stKey; + } else if (top == stKey) { + top = stMapN; + } else { + sep(); + } + doEncodeString(s.c_str(), s.size(), false); + if (top == stKey) { + out_.write(':'); + formatter_.handleColon(); + } + } + + void encodeBinary(const uint8_t* bytes, size_t len) { + sep(); + doEncodeString(reinterpret_cast<const char *>(bytes), len, true); + sep2(); + } + + void arrayStart() { + sep(); + stateStack.push(top); + top = stArray0; + out_.write('['); + formatter_.handleObjectStart(); + } + + void arrayEnd() { + top = stateStack.top(); + stateStack.pop(); + formatter_.handleObjectEnd(); + out_.write(']'); + sep2(); + } + + void objectStart() { + sep(); + stateStack.push(top); + top = stMap0; + out_.write('{'); + formatter_.handleObjectStart(); + } + + void objectEnd() { + top = stateStack.top(); + stateStack.pop(); + formatter_.handleObjectEnd(); + out_.write('}'); + sep2(); + } + +}; + +} +} + +#endif |