// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include #include #include #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/array_dict.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_binary.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_decimal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_dict.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_nested.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_primitive.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_time.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array/builder_union.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/chunked_array.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/ipc/json_simple.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/scalar.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/type_traits.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/decimal.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/float16.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/logging.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/value_parsing.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/json/rapidjson_defs.h" #include #include #include #include #include namespace rj = arrow20::rapidjson; namespace arrow20 { using internal::ParseValue; using util::Float16; namespace ipc { namespace internal { namespace json { using ::arrow20::internal::checked_cast; using ::arrow20::internal::checked_pointer_cast; namespace { constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag; const char* JsonTypeName(rj::Type json_type) { switch (json_type) { case rapidjson::kNullType: return "null"; case rapidjson::kFalseType: return "false"; case rapidjson::kTrueType: return "true"; case rapidjson::kObjectType: return "object"; case rapidjson::kArrayType: return "array"; case rapidjson::kStringType: return "string"; case rapidjson::kNumberType: return "number"; default: return "unknown"; } } Status JSONTypeError(const char* expected_type, rj::Type json_type) { return Status::Invalid("Expected ", expected_type, " or null, got JSON type ", JsonTypeName(json_type)); } class Converter { public: virtual ~Converter() = default; virtual Status Init() { return Status::OK(); } virtual Status AppendValue(const rj::Value& json_obj) = 0; Status AppendNull() { return this->builder()->AppendNull(); } virtual Status AppendValues(const rj::Value& json_array) = 0; virtual std::shared_ptr builder() = 0; virtual Status Finish(std::shared_ptr* out) { auto builder = this->builder(); if (builder->length() == 0) { // Make sure the builder was initialized RETURN_NOT_OK(builder->Resize(1)); } return builder->Finish(out); } protected: std::shared_ptr type_; }; Status GetConverter(const std::shared_ptr&, std::shared_ptr* out); // CRTP template class ConcreteConverter : public Converter { public: Result SizeOfJSONArray(const rj::Value& json_obj) { if (!json_obj.IsArray()) { return JSONTypeError("array", json_obj.GetType()); } return json_obj.Size(); } Status AppendValues(const rj::Value& json_array) final { auto self = static_cast(this); ARROW_ASSIGN_OR_RAISE(auto size, SizeOfJSONArray(json_array)); for (uint32_t i = 0; i < size; ++i) { RETURN_NOT_OK(self->AppendValue(json_array[i])); } return Status::OK(); } const std::shared_ptr& value_type() { if (type_->id() != Type::DICTIONARY) { return type_; } return checked_cast(*type_).value_type(); } template Status MakeConcreteBuilder(std::shared_ptr* out) { std::unique_ptr builder; RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder)); *out = checked_pointer_cast(std::move(builder)); DCHECK(*out); return Status::OK(); } }; // ------------------------------------------------------------------------ // Converter for null arrays class NullConverter final : public ConcreteConverter { public: explicit NullConverter(const std::shared_ptr& type) { type_ = type; builder_ = std::make_shared(); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return AppendNull(); } return JSONTypeError("null", json_obj.GetType()); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for boolean arrays class BooleanConverter final : public ConcreteConverter { public: explicit BooleanConverter(const std::shared_ptr& type) { type_ = type; builder_ = std::make_shared(); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return AppendNull(); } if (json_obj.IsBool()) { return builder_->Append(json_obj.GetBool()); } if (json_obj.IsInt()) { return builder_->Append(json_obj.GetInt() != 0); } return JSONTypeError("boolean", json_obj.GetType()); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Helpers for numeric converters // Convert single signed integer value (also {Date,Time}{32,64} and Timestamp) template enable_if_physical_signed_integer ConvertNumber(const rj::Value& json_obj, const DataType& type, typename T::c_type* out) { if (json_obj.IsInt64()) { int64_t v64 = json_obj.GetInt64(); *out = static_cast(v64); if (*out == v64) { return Status::OK(); } else { return Status::Invalid("Value ", v64, " out of bounds for ", type); } } else { *out = static_cast(0); return JSONTypeError("signed int", json_obj.GetType()); } } // Convert single unsigned integer value template enable_if_unsigned_integer ConvertNumber(const rj::Value& json_obj, const DataType& type, typename T::c_type* out) { if (json_obj.IsUint64()) { uint64_t v64 = json_obj.GetUint64(); *out = static_cast(v64); if (*out == v64) { return Status::OK(); } else { return Status::Invalid("Value ", v64, " out of bounds for ", type); } } else { *out = static_cast(0); return JSONTypeError("unsigned int", json_obj.GetType()); } } // Convert float16/HalfFloatType template enable_if_half_float ConvertNumber(const rj::Value& json_obj, const DataType& type, uint16_t* out) { if (json_obj.IsDouble()) { double f64 = json_obj.GetDouble(); *out = Float16(f64).bits(); return Status::OK(); } else if (json_obj.IsUint()) { uint32_t u32t = json_obj.GetUint(); double f64 = static_cast(u32t); *out = Float16(f64).bits(); return Status::OK(); } else if (json_obj.IsInt()) { int32_t i32t = json_obj.GetInt(); double f64 = static_cast(i32t); *out = Float16(f64).bits(); return Status::OK(); } else { *out = static_cast(0); return JSONTypeError("unsigned int", json_obj.GetType()); } } // Convert single floating point value template enable_if_physical_floating_point ConvertNumber(const rj::Value& json_obj, const DataType& type, typename T::c_type* out) { if (json_obj.IsNumber()) { *out = static_cast(json_obj.GetDouble()); return Status::OK(); } else { *out = static_cast(0); return JSONTypeError("number", json_obj.GetType()); } } // ------------------------------------------------------------------------ // Converter for int arrays template ::BuilderType> class IntegerConverter final : public ConcreteConverter> { using c_type = typename Type::c_type; static constexpr auto is_signed = std::is_signed::value; public: explicit IntegerConverter(const std::shared_ptr& type) { this->type_ = type; } Status Init() override { return this->MakeConcreteBuilder(&builder_); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } c_type value; RETURN_NOT_OK(ConvertNumber(json_obj, *this->type_, &value)); return builder_->Append(value); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for float arrays template ::BuilderType> class FloatConverter final : public ConcreteConverter> { using c_type = typename Type::c_type; public: explicit FloatConverter(const std::shared_ptr& type) { this->type_ = type; } Status Init() override { return this->MakeConcreteBuilder(&builder_); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } c_type value; RETURN_NOT_OK(ConvertNumber(json_obj, *this->type_, &value)); return builder_->Append(value); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for decimal arrays template class DecimalConverter final : public ConcreteConverter< DecimalConverter> { public: explicit DecimalConverter(const std::shared_ptr& type) { this->type_ = type; decimal_type_ = &checked_cast(*this->value_type()); } Status Init() override { return this->MakeConcreteBuilder(&builder_); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } if (json_obj.IsString()) { int32_t precision, scale; DecimalValue d; auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); RETURN_NOT_OK(DecimalValue::FromString(view, &d, &precision, &scale)); if (scale != decimal_type_->scale()) { return Status::Invalid("Invalid scale for decimal: expected ", decimal_type_->scale(), ", got ", scale); } return builder_->Append(d); } return JSONTypeError("decimal string", json_obj.GetType()); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; const DecimalSubtype* decimal_type_; }; template ::BuilderType> using Decimal32Converter = DecimalConverter; template ::BuilderType> using Decimal64Converter = DecimalConverter; template ::BuilderType> using Decimal128Converter = DecimalConverter; template ::BuilderType> using Decimal256Converter = DecimalConverter; // ------------------------------------------------------------------------ // Converter for timestamp arrays class TimestampConverter final : public ConcreteConverter { public: explicit TimestampConverter(const std::shared_ptr& type) : timestamp_type_{checked_cast(type.get())} { this->type_ = type; builder_ = std::make_shared(type, default_memory_pool()); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } int64_t value; if (json_obj.IsNumber()) { RETURN_NOT_OK(ConvertNumber(json_obj, *this->type_, &value)); } else if (json_obj.IsString()) { std::string_view view(json_obj.GetString(), json_obj.GetStringLength()); if (!ParseValue(*timestamp_type_, view.data(), view.size(), &value)) { return Status::Invalid("couldn't parse timestamp from ", view); } } else { return JSONTypeError("timestamp", json_obj.GetType()); } return builder_->Append(value); } std::shared_ptr builder() override { return builder_; } private: const TimestampType* timestamp_type_; std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for day-time interval arrays class DayTimeIntervalConverter final : public ConcreteConverter { public: explicit DayTimeIntervalConverter(const std::shared_ptr& type) { this->type_ = type; builder_ = std::make_shared(default_memory_pool()); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } DayTimeIntervalType::DayMilliseconds value; if (!json_obj.IsArray()) { return JSONTypeError("array", json_obj.GetType()); } if (json_obj.Size() != 2) { return Status::Invalid( "day time interval pair must have exactly two elements, had ", json_obj.Size()); } RETURN_NOT_OK(ConvertNumber(json_obj[0], *this->type_, &value.days)); RETURN_NOT_OK( ConvertNumber(json_obj[1], *this->type_, &value.milliseconds)); return builder_->Append(value); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; class MonthDayNanoIntervalConverter final : public ConcreteConverter { public: explicit MonthDayNanoIntervalConverter(const std::shared_ptr& type) { this->type_ = type; builder_ = std::make_shared(default_memory_pool()); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } MonthDayNanoIntervalType::MonthDayNanos value; if (!json_obj.IsArray()) { return JSONTypeError("array", json_obj.GetType()); } if (json_obj.Size() != 3) { return Status::Invalid( "month_day_nano_interval must have exactly 3 elements, had ", json_obj.Size()); } RETURN_NOT_OK(ConvertNumber(json_obj[0], *this->type_, &value.months)); RETURN_NOT_OK(ConvertNumber(json_obj[1], *this->type_, &value.days)); RETURN_NOT_OK( ConvertNumber(json_obj[2], *this->type_, &value.nanoseconds)); return builder_->Append(value); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for binary and string arrays template ::BuilderType> class StringConverter final : public ConcreteConverter> { public: explicit StringConverter(const std::shared_ptr& type) { this->type_ = type; } Status Init() override { return this->MakeConcreteBuilder(&builder_); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } if (json_obj.IsString()) { auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); return builder_->Append(view); } else { return JSONTypeError("string", json_obj.GetType()); } } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for fixed-size binary arrays template ::BuilderType> class FixedSizeBinaryConverter final : public ConcreteConverter> { public: explicit FixedSizeBinaryConverter(const std::shared_ptr& type) { this->type_ = type; } Status Init() override { return this->MakeConcreteBuilder(&builder_); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } if (json_obj.IsString()) { auto view = std::string_view(json_obj.GetString(), json_obj.GetStringLength()); if (view.length() != static_cast(builder_->byte_width())) { std::stringstream ss; ss << "Invalid string length " << view.length() << " in JSON input for " << this->type_->ToString(); return Status::Invalid(ss.str()); } return builder_->Append(view); } else { return JSONTypeError("string", json_obj.GetType()); } } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; }; // ------------------------------------------------------------------------ // Converter for list arrays template class VarLengthListLikeConverter final : public ConcreteConverter> { public: using BuilderType = typename TypeTraits::BuilderType; explicit VarLengthListLikeConverter(const std::shared_ptr& type) { this->type_ = type; } Status Init() override { const auto& var_length_list_like_type = checked_cast(*this->type_); RETURN_NOT_OK( GetConverter(var_length_list_like_type.value_type(), &child_converter_)); auto child_builder = child_converter_->builder(); builder_ = std::make_shared(default_memory_pool(), child_builder, this->type_); return Status::OK(); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } // Extend the child converter with this JSON array ARROW_ASSIGN_OR_RAISE(auto size, this->SizeOfJSONArray(json_obj)); RETURN_NOT_OK(builder_->Append(true, size)); return child_converter_->AppendValues(json_obj); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; std::shared_ptr child_converter_; }; // ------------------------------------------------------------------------ // Converter for map arrays class MapConverter final : public ConcreteConverter { public: explicit MapConverter(const std::shared_ptr& type) { type_ = type; } Status Init() override { const auto& map_type = checked_cast(*type_); RETURN_NOT_OK(GetConverter(map_type.key_type(), &key_converter_)); RETURN_NOT_OK(GetConverter(map_type.item_type(), &item_converter_)); auto key_builder = key_converter_->builder(); auto item_builder = item_converter_->builder(); builder_ = std::make_shared(default_memory_pool(), key_builder, item_builder, type_); return Status::OK(); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } RETURN_NOT_OK(builder_->Append()); if (!json_obj.IsArray()) { return JSONTypeError("array", json_obj.GetType()); } auto size = json_obj.Size(); for (uint32_t i = 0; i < size; ++i) { const auto& json_pair = json_obj[i]; if (!json_pair.IsArray()) { return JSONTypeError("array", json_pair.GetType()); } if (json_pair.Size() != 2) { return Status::Invalid("key item pair must have exactly two elements, had ", json_pair.Size()); } if (json_pair[0].IsNull()) { return Status::Invalid("null key is invalid"); } RETURN_NOT_OK(key_converter_->AppendValue(json_pair[0])); RETURN_NOT_OK(item_converter_->AppendValue(json_pair[1])); } return Status::OK(); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; std::shared_ptr key_converter_, item_converter_; }; // ------------------------------------------------------------------------ // Converter for fixed size list arrays class FixedSizeListConverter final : public ConcreteConverter { public: explicit FixedSizeListConverter(const std::shared_ptr& type) { type_ = type; } Status Init() override { const auto& list_type = checked_cast(*type_); list_size_ = list_type.list_size(); RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_)); auto child_builder = child_converter_->builder(); builder_ = std::make_shared(default_memory_pool(), child_builder, type_); return Status::OK(); } Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } RETURN_NOT_OK(builder_->Append()); // Extend the child converter with this JSON array RETURN_NOT_OK(child_converter_->AppendValues(json_obj)); if (json_obj.GetArray().Size() != static_cast(list_size_)) { return Status::Invalid("incorrect list size ", json_obj.GetArray().Size()); } return Status::OK(); } std::shared_ptr builder() override { return builder_; } private: int32_t list_size_; std::shared_ptr builder_; std::shared_ptr child_converter_; }; // ------------------------------------------------------------------------ // Converter for struct arrays class StructConverter final : public ConcreteConverter { public: explicit StructConverter(const std::shared_ptr& type) { type_ = type; } Status Init() override { std::vector> child_builders; for (const auto& field : type_->fields()) { std::shared_ptr child_converter; RETURN_NOT_OK(GetConverter(field->type(), &child_converter)); child_converters_.push_back(child_converter); child_builders.push_back(child_converter->builder()); } builder_ = std::make_shared(type_, default_memory_pool(), std::move(child_builders)); return Status::OK(); } // Append a JSON value that is either an array of N elements in order // or an object mapping struct names to values (omitted struct members // are mapped to null). Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } if (json_obj.IsArray()) { auto size = json_obj.Size(); auto expected_size = static_cast(type_->num_fields()); if (size != expected_size) { return Status::Invalid("Expected array of size ", expected_size, ", got array of size ", size); } for (uint32_t i = 0; i < size; ++i) { RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i])); } return builder_->Append(); } if (json_obj.IsObject()) { auto remaining = json_obj.MemberCount(); auto num_children = type_->num_fields(); for (int32_t i = 0; i < num_children; ++i) { const auto& field = type_->field(i); auto it = json_obj.FindMember(field->name()); if (it != json_obj.MemberEnd()) { --remaining; RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value)); } else { RETURN_NOT_OK(child_converters_[i]->AppendNull()); } } if (remaining > 0) { rj::StringBuffer sb; rj::Writer writer(sb); json_obj.Accept(writer); return Status::Invalid("Unexpected members in JSON object for type ", type_->ToString(), " Object: ", sb.GetString()); } return builder_->Append(); } return JSONTypeError("array or object", json_obj.GetType()); } std::shared_ptr builder() override { return builder_; } private: std::shared_ptr builder_; std::vector> child_converters_; }; // ------------------------------------------------------------------------ // Converter for union arrays class UnionConverter final : public ConcreteConverter { public: explicit UnionConverter(const std::shared_ptr& type) { type_ = type; } Status Init() override { auto union_type = checked_cast(type_.get()); mode_ = union_type->mode(); type_id_to_child_num_.clear(); type_id_to_child_num_.resize(union_type->max_type_code() + 1, -1); int child_i = 0; for (auto type_id : union_type->type_codes()) { type_id_to_child_num_[type_id] = child_i++; } std::vector> child_builders; for (const auto& field : type_->fields()) { std::shared_ptr child_converter; RETURN_NOT_OK(GetConverter(field->type(), &child_converter)); child_converters_.push_back(child_converter); child_builders.push_back(child_converter->builder()); } if (mode_ == UnionMode::DENSE) { builder_ = std::make_shared(default_memory_pool(), std::move(child_builders), type_); } else { builder_ = std::make_shared(default_memory_pool(), std::move(child_builders), type_); } return Status::OK(); } // Append a JSON value that must be a 2-long array, containing the type_id // and value of the UnionArray's slot. Status AppendValue(const rj::Value& json_obj) override { if (json_obj.IsNull()) { return this->AppendNull(); } if (!json_obj.IsArray()) { return JSONTypeError("array", json_obj.GetType()); } if (json_obj.Size() != 2) { return Status::Invalid("Expected [type_id, value] pair, got array of size ", json_obj.Size()); } const auto& id_obj = json_obj[0]; if (!id_obj.IsInt()) { return JSONTypeError("int", id_obj.GetType()); } auto id = static_cast(id_obj.GetInt()); auto child_num = type_id_to_child_num_[id]; if (child_num == -1) { return Status::Invalid("type_id ", id, " not found in ", *type_); } auto child_converter = child_converters_[child_num]; if (mode_ == UnionMode::SPARSE) { RETURN_NOT_OK(checked_cast(*builder_).Append(id)); for (auto&& other_converter : child_converters_) { if (other_converter != child_converter) { RETURN_NOT_OK(other_converter->AppendNull()); } } } else { RETURN_NOT_OK(checked_cast(*builder_).Append(id)); } return child_converter->AppendValue(json_obj[1]); } std::shared_ptr builder() override { return builder_; } private: UnionMode::type mode_; std::shared_ptr builder_; std::vector> child_converters_; std::vector type_id_to_child_num_; }; // ------------------------------------------------------------------------ // General conversion functions Status ConversionNotImplemented(const std::shared_ptr& type) { return Status::NotImplemented("JSON conversion to ", type->ToString(), " not implemented"); } Status GetDictConverter(const std::shared_ptr& type, std::shared_ptr* out) { std::shared_ptr res; const auto value_type = checked_cast(*type).value_type(); #define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \ case ID: \ res = std::make_shared>>(type); \ break; #define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \ case ID: \ res = std::make_shared>>(type); \ break; switch (value_type->id()) { PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type) PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type) PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type) PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type) PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type) PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type) PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type) PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type) PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType) PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType) PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType) PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType) PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType) PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType) PARAM_CONVERTER_CASE(Type::STRING_VIEW, StringConverter, StringViewType) PARAM_CONVERTER_CASE(Type::BINARY_VIEW, StringConverter, BinaryViewType) SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter, FixedSizeBinaryType) SIMPLE_CONVERTER_CASE(Type::DECIMAL32, Decimal32Converter, Decimal32Type) SIMPLE_CONVERTER_CASE(Type::DECIMAL64, Decimal64Converter, Decimal64Type) SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter, Decimal128Type) SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter, Decimal256Type) default: return ConversionNotImplemented(type); } #undef SIMPLE_CONVERTER_CASE #undef PARAM_CONVERTER_CASE RETURN_NOT_OK(res->Init()); *out = res; return Status::OK(); } Status GetConverter(const std::shared_ptr& type, std::shared_ptr* out) { if (type->id() == Type::DICTIONARY) { return GetDictConverter(type, out); } std::shared_ptr res; #define SIMPLE_CONVERTER_CASE(ID, CLASS) \ case ID: \ res = std::make_shared(type); \ break; switch (type->id()) { SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter) SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::DURATION, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::NA, NullConverter) SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter) SIMPLE_CONVERTER_CASE(Type::HALF_FLOAT, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter) SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter) SIMPLE_CONVERTER_CASE(Type::LIST, VarLengthListLikeConverter) SIMPLE_CONVERTER_CASE(Type::LARGE_LIST, VarLengthListLikeConverter) SIMPLE_CONVERTER_CASE(Type::LIST_VIEW, VarLengthListLikeConverter) SIMPLE_CONVERTER_CASE(Type::LARGE_LIST_VIEW, VarLengthListLikeConverter) SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter) SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter) SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter) SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter) SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter) SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter) SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter) SIMPLE_CONVERTER_CASE(Type::STRING_VIEW, StringConverter) SIMPLE_CONVERTER_CASE(Type::BINARY_VIEW, StringConverter) SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>) SIMPLE_CONVERTER_CASE(Type::DECIMAL32, Decimal32Converter<>) SIMPLE_CONVERTER_CASE(Type::DECIMAL64, Decimal64Converter<>) SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter<>) SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter<>) SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter) SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter) SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter) SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter) SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTH_DAY_NANO, MonthDayNanoIntervalConverter) default: return ConversionNotImplemented(type); } #undef SIMPLE_CONVERTER_CASE RETURN_NOT_OK(res->Init()); *out = res; return Status::OK(); } } // namespace Result> ArrayFromJSON(const std::shared_ptr& type, std::string_view json_string) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); rj::Document json_doc; json_doc.Parse(json_string.data(), json_string.length()); if (json_doc.HasParseError()) { return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", GetParseError_En(json_doc.GetParseError())); } // The JSON document should be an array, append it RETURN_NOT_OK(converter->AppendValues(json_doc)); std::shared_ptr out; RETURN_NOT_OK(converter->Finish(&out)); return out; } Result> ArrayFromJSON(const std::shared_ptr& type, const std::string& json_string) { return ArrayFromJSON(type, std::string_view(json_string)); } Result> ArrayFromJSON(const std::shared_ptr& type, const char* json_string) { return ArrayFromJSON(type, std::string_view(json_string)); } Status ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json_strings, std::shared_ptr* out) { ArrayVector out_chunks; out_chunks.reserve(json_strings.size()); for (const std::string& chunk_json : json_strings) { out_chunks.emplace_back(); ARROW_ASSIGN_OR_RAISE(out_chunks.back(), ArrayFromJSON(type, chunk_json)); } *out = std::make_shared(std::move(out_chunks), type); return Status::OK(); } Status DictArrayFromJSON(const std::shared_ptr& type, std::string_view indices_json, std::string_view dictionary_json, std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type); } const auto& dictionary_type = checked_cast(*type); ARROW_ASSIGN_OR_RAISE(auto indices, ArrayFromJSON(dictionary_type.index_type(), indices_json)); ARROW_ASSIGN_OR_RAISE(auto dictionary, ArrayFromJSON(dictionary_type.value_type(), dictionary_json)); return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary)) .Value(out); } Status ScalarFromJSON(const std::shared_ptr& type, std::string_view json_string, std::shared_ptr* out) { std::shared_ptr converter; RETURN_NOT_OK(GetConverter(type, &converter)); rj::Document json_doc; json_doc.Parse(json_string.data(), json_string.length()); if (json_doc.HasParseError()) { return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ", GetParseError_En(json_doc.GetParseError())); } std::shared_ptr array; RETURN_NOT_OK(converter->AppendValue(json_doc)); RETURN_NOT_OK(converter->Finish(&array)); DCHECK_EQ(array->length(), 1); return array->GetScalar(0).Value(out); } Status DictScalarFromJSON(const std::shared_ptr& type, std::string_view index_json, std::string_view dictionary_json, std::shared_ptr* out) { if (type->id() != Type::DICTIONARY) { return Status::TypeError("DictScalarFromJSON requires dictionary type, got ", *type); } const auto& dictionary_type = checked_cast(*type); std::shared_ptr index; std::shared_ptr dictionary; RETURN_NOT_OK(ScalarFromJSON(dictionary_type.index_type(), index_json, &index)); ARROW_ASSIGN_OR_RAISE(dictionary, ArrayFromJSON(dictionary_type.value_type(), dictionary_json)); *out = DictionaryScalar::Make(std::move(index), std::move(dictionary)); return Status::OK(); } } // namespace json } // namespace internal } // namespace ipc } // namespace arrow20