aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorheretic <heretic@yandex-team.com>2022-09-01 11:18:57 +0300
committerheretic <heretic@yandex-team.com>2022-09-01 11:18:57 +0300
commit8393683e8cb62468ccace14fa3379e3a4fbdde73 (patch)
tree4f2d32a77665019c9491d34dbe1cc5e605bb220c
parent836e587fc927c87149f8f0b2676d2587e6a79111 (diff)
downloadydb-8393683e8cb62468ccace14fa3379e3a4fbdde73.tar.gz
add apache arrow python
-rw-r--r--contrib/libs/apache/arrow/CMakeLists.txt16
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc761
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.h532
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc448
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.h113
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc780
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.h132
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc271
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.h130
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/type_fwd.h49
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc73
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.h56
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc940
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc469
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.h68
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc186
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.h35
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc323
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/converter.h94
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc83
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.h49
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc82
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.h48
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc28
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/options.h74
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc1099
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/parser.h101
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/rapidjson_defs.h43
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc227
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/reader.h72
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/json/type_fwd.h26
-rw-r--r--contrib/libs/apache/arrow/cpp/src/arrow/util/bitset_stack.h89
-rw-r--r--contrib/libs/apache/arrow/src/arrow/util/config.h6
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/document.h2602
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/internal/dtoa.h245
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/internal/itoa.h304
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/internal/strfunc.h69
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/stringbuffer.h121
-rw-r--r--contrib/libs/rapidjson/include/rapidjson/writer.h640
39 files changed, 11481 insertions, 3 deletions
diff --git a/contrib/libs/apache/arrow/CMakeLists.txt b/contrib/libs/apache/arrow/CMakeLists.txt
index eb1eebea7e..bae344e8b2 100644
--- a/contrib/libs/apache/arrow/CMakeLists.txt
+++ b/contrib/libs/apache/arrow/CMakeLists.txt
@@ -38,6 +38,7 @@ target_include_directories(libs-apache-arrow PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/apache/orc/c++/include
${CMAKE_SOURCE_DIR}/contrib/libs/flatbuffers/include
${CMAKE_SOURCE_DIR}/contrib/libs/lz4
+ ${CMAKE_SOURCE_DIR}/contrib/libs/rapidjson/include
${CMAKE_SOURCE_DIR}/contrib/libs/re2
${CMAKE_SOURCE_DIR}/contrib/libs/utf8proc
${CMAKE_SOURCE_DIR}/contrib/libs/zstd/include
@@ -50,6 +51,7 @@ target_link_libraries(libs-apache-arrow PUBLIC
libs-brotli-enc
contrib-libs-double-conversion
contrib-libs-lz4
+ contrib-libs-rapidjson
contrib-libs-re2
contrib-libs-snappy
contrib-libs-utf8proc
@@ -146,6 +148,11 @@ target_sources(libs-apache-arrow PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/datum.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/device.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/extension_type.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/buffered.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/caching.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/compressed.cc
@@ -157,11 +164,20 @@ target_sources(libs-apache-arrow PRIVATE
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/io/transform.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/dictionary.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/feather.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/message.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/metadata_internal.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/options.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/reader.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/ipc/writer.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc
+ ${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/memory_pool.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/pretty_print.cc
${CMAKE_SOURCE_DIR}/contrib/libs/apache/arrow/cpp/src/arrow/record_batch.cc
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc
new file mode 100644
index 0000000000..4f44e24ba6
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.cc
@@ -0,0 +1,761 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <sstream>
+#include <utility>
+
+#include "arrow/util/config.h"
+
+#include "arrow/filesystem/filesystem.h"
+#ifdef ARROW_HDFS
+#error #include "arrow/filesystem/hdfs.h"
+#endif
+#ifdef ARROW_S3
+#error #include "arrow/filesystem/s3fs.h"
+#endif
+#include "arrow/filesystem/localfs.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/slow.h"
+#include "arrow/io/util_internal.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/parallel.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/vector.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+
+using internal::checked_pointer_cast;
+using internal::TaskHints;
+using internal::Uri;
+using io::internal::SubmitIO;
+
+namespace fs {
+
+using internal::ConcatAbstractPath;
+using internal::EnsureTrailingSlash;
+using internal::GetAbstractPathParent;
+using internal::kSep;
+using internal::RemoveLeadingSlash;
+using internal::RemoveTrailingSlash;
+using internal::ToSlashes;
+
+std::string ToString(FileType ftype) {
+ switch (ftype) {
+ case FileType::NotFound:
+ return "not-found";
+ case FileType::Unknown:
+ return "unknown";
+ case FileType::File:
+ return "file";
+ case FileType::Directory:
+ return "directory";
+ default:
+ ARROW_LOG(FATAL) << "Invalid FileType value: " << static_cast<int>(ftype);
+ return "???";
+ }
+}
+
+// For googletest
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType ftype) {
+#define FILE_TYPE_CASE(value_name) \
+ case FileType::value_name: \
+ os << "FileType::" ARROW_STRINGIFY(value_name); \
+ break;
+
+ switch (ftype) {
+ FILE_TYPE_CASE(NotFound)
+ FILE_TYPE_CASE(Unknown)
+ FILE_TYPE_CASE(File)
+ FILE_TYPE_CASE(Directory)
+ default:
+ ARROW_LOG(FATAL) << "Invalid FileType value: " << static_cast<int>(ftype);
+ }
+
+#undef FILE_TYPE_CASE
+ return os;
+}
+
+std::string FileInfo::base_name() const {
+ return internal::GetAbstractPathParent(path_).second;
+}
+
+std::string FileInfo::dir_name() const {
+ return internal::GetAbstractPathParent(path_).first;
+}
+
+// Debug helper
+std::string FileInfo::ToString() const {
+ std::stringstream os;
+ os << *this;
+ return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const FileInfo& info) {
+ return os << "FileInfo(" << info.type() << ", " << info.path() << ")";
+}
+
+std::string FileInfo::extension() const {
+ return internal::GetAbstractPathExtension(path_);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// FileSystem default method implementations
+
+FileSystem::~FileSystem() {}
+
+Result<std::string> FileSystem::NormalizePath(std::string path) { return path; }
+
+Result<std::vector<FileInfo>> FileSystem::GetFileInfo(
+ const std::vector<std::string>& paths) {
+ std::vector<FileInfo> res;
+ res.reserve(paths.size());
+ for (const auto& path : paths) {
+ ARROW_ASSIGN_OR_RAISE(FileInfo info, GetFileInfo(path));
+ res.push_back(std::move(info));
+ }
+ return res;
+}
+
+namespace {
+
+template <typename DeferredFunc>
+auto FileSystemDefer(FileSystem* fs, bool synchronous, DeferredFunc&& func)
+ -> decltype(DeferNotOk(
+ fs->io_context().executor()->Submit(func, std::shared_ptr<FileSystem>{}))) {
+ auto self = fs->shared_from_this();
+ if (synchronous) {
+ return std::forward<DeferredFunc>(func)(std::move(self));
+ }
+ return DeferNotOk(io::internal::SubmitIO(
+ fs->io_context(), std::forward<DeferredFunc>(func), std::move(self)));
+}
+
+} // namespace
+
+Future<std::vector<FileInfo>> FileSystem::GetFileInfoAsync(
+ const std::vector<std::string>& paths) {
+ return FileSystemDefer(
+ this, default_async_is_sync_,
+ [paths](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(paths); });
+}
+
+FileInfoGenerator FileSystem::GetFileInfoGenerator(const FileSelector& select) {
+ auto fut = FileSystemDefer(
+ this, default_async_is_sync_,
+ [select](std::shared_ptr<FileSystem> self) { return self->GetFileInfo(select); });
+ return MakeSingleFutureGenerator(std::move(fut));
+}
+
+Status FileSystem::DeleteFiles(const std::vector<std::string>& paths) {
+ Status st = Status::OK();
+ for (const auto& path : paths) {
+ st &= DeleteFile(path);
+ }
+ return st;
+}
+
+namespace {
+
+Status ValidateInputFileInfo(const FileInfo& info) {
+ if (info.type() == FileType::NotFound) {
+ return internal::PathNotFound(info.path());
+ }
+ if (info.type() != FileType::File && info.type() != FileType::Unknown) {
+ return internal::NotAFile(info.path());
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+Result<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStream(
+ const FileInfo& info) {
+ RETURN_NOT_OK(ValidateInputFileInfo(info));
+ return OpenInputStream(info.path());
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFile(
+ const FileInfo& info) {
+ RETURN_NOT_OK(ValidateInputFileInfo(info));
+ return OpenInputFile(info.path());
+}
+
+Future<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStreamAsync(
+ const std::string& path) {
+ return FileSystemDefer(
+ this, default_async_is_sync_,
+ [path](std::shared_ptr<FileSystem> self) { return self->OpenInputStream(path); });
+}
+
+Future<std::shared_ptr<io::InputStream>> FileSystem::OpenInputStreamAsync(
+ const FileInfo& info) {
+ RETURN_NOT_OK(ValidateInputFileInfo(info));
+ return FileSystemDefer(
+ this, default_async_is_sync_,
+ [info](std::shared_ptr<FileSystem> self) { return self->OpenInputStream(info); });
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
+ const std::string& path) {
+ return FileSystemDefer(
+ this, default_async_is_sync_,
+ [path](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(path); });
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> FileSystem::OpenInputFileAsync(
+ const FileInfo& info) {
+ RETURN_NOT_OK(ValidateInputFileInfo(info));
+ return FileSystemDefer(
+ this, default_async_is_sync_,
+ [info](std::shared_ptr<FileSystem> self) { return self->OpenInputFile(info); });
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenOutputStream(
+ const std::string& path) {
+ return OpenOutputStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+Result<std::shared_ptr<io::OutputStream>> FileSystem::OpenAppendStream(
+ const std::string& path) {
+ return OpenAppendStream(path, std::shared_ptr<const KeyValueMetadata>{});
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SubTreeFileSystem implementation
+
+SubTreeFileSystem::SubTreeFileSystem(const std::string& base_path,
+ std::shared_ptr<FileSystem> base_fs)
+ : FileSystem(base_fs->io_context()),
+ base_path_(NormalizeBasePath(base_path, base_fs).ValueOrDie()),
+ base_fs_(base_fs) {}
+
+SubTreeFileSystem::~SubTreeFileSystem() {}
+
+Result<std::string> SubTreeFileSystem::NormalizeBasePath(
+ std::string base_path, const std::shared_ptr<FileSystem>& base_fs) {
+ ARROW_ASSIGN_OR_RAISE(base_path, base_fs->NormalizePath(std::move(base_path)));
+ return EnsureTrailingSlash(std::move(base_path));
+}
+
+bool SubTreeFileSystem::Equals(const FileSystem& other) const {
+ if (this == &other) {
+ return true;
+ }
+ if (other.type_name() != type_name()) {
+ return false;
+ }
+ const auto& subfs = ::arrow::internal::checked_cast<const SubTreeFileSystem&>(other);
+ return base_path_ == subfs.base_path_ && base_fs_->Equals(subfs.base_fs_);
+}
+
+std::string SubTreeFileSystem::PrependBase(const std::string& s) const {
+ if (s.empty()) {
+ return base_path_;
+ } else {
+ return ConcatAbstractPath(base_path_, s);
+ }
+}
+
+Status SubTreeFileSystem::PrependBaseNonEmpty(std::string* s) const {
+ if (s->empty()) {
+ return Status::IOError("Empty path");
+ } else {
+ *s = ConcatAbstractPath(base_path_, *s);
+ return Status::OK();
+ }
+}
+
+Result<std::string> SubTreeFileSystem::StripBase(const std::string& s) const {
+ auto len = base_path_.length();
+ // Note base_path_ ends with a slash (if not empty)
+ if (s.length() >= len && s.substr(0, len) == base_path_) {
+ return s.substr(len);
+ } else {
+ return Status::UnknownError("Underlying filesystem returned path '", s,
+ "', which is not a subpath of '", base_path_, "'");
+ }
+}
+
+Status SubTreeFileSystem::FixInfo(FileInfo* info) const {
+ ARROW_ASSIGN_OR_RAISE(auto fixed_path, StripBase(info->path()));
+ info->set_path(std::move(fixed_path));
+ return Status::OK();
+}
+
+Result<std::string> SubTreeFileSystem::NormalizePath(std::string path) {
+ ARROW_ASSIGN_OR_RAISE(auto normalized, base_fs_->NormalizePath(PrependBase(path)));
+ return StripBase(std::move(normalized));
+}
+
+Result<FileInfo> SubTreeFileSystem::GetFileInfo(const std::string& path) {
+ ARROW_ASSIGN_OR_RAISE(FileInfo info, base_fs_->GetFileInfo(PrependBase(path)));
+ RETURN_NOT_OK(FixInfo(&info));
+ return info;
+}
+
+Result<std::vector<FileInfo>> SubTreeFileSystem::GetFileInfo(const FileSelector& select) {
+ auto selector = select;
+ selector.base_dir = PrependBase(selector.base_dir);
+ ARROW_ASSIGN_OR_RAISE(auto infos, base_fs_->GetFileInfo(selector));
+ for (auto& info : infos) {
+ RETURN_NOT_OK(FixInfo(&info));
+ }
+ return infos;
+}
+
+FileInfoGenerator SubTreeFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+ auto selector = select;
+ selector.base_dir = PrependBase(selector.base_dir);
+ auto gen = base_fs_->GetFileInfoGenerator(selector);
+
+ auto self = checked_pointer_cast<SubTreeFileSystem>(shared_from_this());
+
+ std::function<Result<std::vector<FileInfo>>(const std::vector<FileInfo>& infos)>
+ fix_infos = [self](std::vector<FileInfo> infos) -> Result<std::vector<FileInfo>> {
+ for (auto& info : infos) {
+ RETURN_NOT_OK(self->FixInfo(&info));
+ }
+ return infos;
+ };
+ return MakeMappedGenerator(gen, fix_infos);
+}
+
+Status SubTreeFileSystem::CreateDir(const std::string& path, bool recursive) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->CreateDir(s, recursive);
+}
+
+Status SubTreeFileSystem::DeleteDir(const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->DeleteDir(s);
+}
+
+Status SubTreeFileSystem::DeleteDirContents(const std::string& path) {
+ if (internal::IsEmptyPath(path)) {
+ return internal::InvalidDeleteDirContents(path);
+ }
+ auto s = PrependBase(path);
+ return base_fs_->DeleteDirContents(s);
+}
+
+Status SubTreeFileSystem::DeleteRootDirContents() {
+ if (base_path_.empty()) {
+ return base_fs_->DeleteRootDirContents();
+ } else {
+ return base_fs_->DeleteDirContents(base_path_);
+ }
+}
+
+Status SubTreeFileSystem::DeleteFile(const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->DeleteFile(s);
+}
+
+Status SubTreeFileSystem::Move(const std::string& src, const std::string& dest) {
+ auto s = src;
+ auto d = dest;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ RETURN_NOT_OK(PrependBaseNonEmpty(&d));
+ return base_fs_->Move(s, d);
+}
+
+Status SubTreeFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+ auto s = src;
+ auto d = dest;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ RETURN_NOT_OK(PrependBaseNonEmpty(&d));
+ return base_fs_->CopyFile(s, d);
+}
+
+Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
+ const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenInputStream(s);
+}
+
+Result<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStream(
+ const FileInfo& info) {
+ auto s = info.path();
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ FileInfo new_info(info);
+ new_info.set_path(std::move(s));
+ return base_fs_->OpenInputStream(new_info);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+ const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenInputStreamAsync(s);
+}
+
+Future<std::shared_ptr<io::InputStream>> SubTreeFileSystem::OpenInputStreamAsync(
+ const FileInfo& info) {
+ auto s = info.path();
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ FileInfo new_info(info);
+ new_info.set_path(std::move(s));
+ return base_fs_->OpenInputStreamAsync(new_info);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
+ const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenInputFile(s);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFile(
+ const FileInfo& info) {
+ auto s = info.path();
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ FileInfo new_info(info);
+ new_info.set_path(std::move(s));
+ return base_fs_->OpenInputFile(new_info);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+ const std::string& path) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenInputFileAsync(s);
+}
+
+Future<std::shared_ptr<io::RandomAccessFile>> SubTreeFileSystem::OpenInputFileAsync(
+ const FileInfo& info) {
+ auto s = info.path();
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ FileInfo new_info(info);
+ new_info.set_path(std::move(s));
+ return base_fs_->OpenInputFileAsync(new_info);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenOutputStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenOutputStream(s, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SubTreeFileSystem::OpenAppendStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ auto s = path;
+ RETURN_NOT_OK(PrependBaseNonEmpty(&s));
+ return base_fs_->OpenAppendStream(s, metadata);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// SlowFileSystem implementation
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+ std::shared_ptr<io::LatencyGenerator> latencies)
+ : FileSystem(base_fs->io_context()), base_fs_(base_fs), latencies_(latencies) {}
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+ double average_latency)
+ : FileSystem(base_fs->io_context()),
+ base_fs_(base_fs),
+ latencies_(io::LatencyGenerator::Make(average_latency)) {}
+
+SlowFileSystem::SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+ double average_latency, int32_t seed)
+ : FileSystem(base_fs->io_context()),
+ base_fs_(base_fs),
+ latencies_(io::LatencyGenerator::Make(average_latency, seed)) {}
+
+bool SlowFileSystem::Equals(const FileSystem& other) const { return this == &other; }
+
+Result<FileInfo> SlowFileSystem::GetFileInfo(const std::string& path) {
+ latencies_->Sleep();
+ return base_fs_->GetFileInfo(path);
+}
+
+Result<std::vector<FileInfo>> SlowFileSystem::GetFileInfo(const FileSelector& selector) {
+ latencies_->Sleep();
+ return base_fs_->GetFileInfo(selector);
+}
+
+Status SlowFileSystem::CreateDir(const std::string& path, bool recursive) {
+ latencies_->Sleep();
+ return base_fs_->CreateDir(path, recursive);
+}
+
+Status SlowFileSystem::DeleteDir(const std::string& path) {
+ latencies_->Sleep();
+ return base_fs_->DeleteDir(path);
+}
+
+Status SlowFileSystem::DeleteDirContents(const std::string& path) {
+ latencies_->Sleep();
+ return base_fs_->DeleteDirContents(path);
+}
+
+Status SlowFileSystem::DeleteRootDirContents() {
+ latencies_->Sleep();
+ return base_fs_->DeleteRootDirContents();
+}
+
+Status SlowFileSystem::DeleteFile(const std::string& path) {
+ latencies_->Sleep();
+ return base_fs_->DeleteFile(path);
+}
+
+Status SlowFileSystem::Move(const std::string& src, const std::string& dest) {
+ latencies_->Sleep();
+ return base_fs_->Move(src, dest);
+}
+
+Status SlowFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+ latencies_->Sleep();
+ return base_fs_->CopyFile(src, dest);
+}
+
+Result<std::shared_ptr<io::InputStream>> SlowFileSystem::OpenInputStream(
+ const std::string& path) {
+ latencies_->Sleep();
+ ARROW_ASSIGN_OR_RAISE(auto stream, base_fs_->OpenInputStream(path));
+ return std::make_shared<io::SlowInputStream>(stream, latencies_);
+}
+
+Result<std::shared_ptr<io::InputStream>> SlowFileSystem::OpenInputStream(
+ const FileInfo& info) {
+ latencies_->Sleep();
+ ARROW_ASSIGN_OR_RAISE(auto stream, base_fs_->OpenInputStream(info));
+ return std::make_shared<io::SlowInputStream>(stream, latencies_);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
+ const std::string& path) {
+ latencies_->Sleep();
+ ARROW_ASSIGN_OR_RAISE(auto file, base_fs_->OpenInputFile(path));
+ return std::make_shared<io::SlowRandomAccessFile>(file, latencies_);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> SlowFileSystem::OpenInputFile(
+ const FileInfo& info) {
+ latencies_->Sleep();
+ ARROW_ASSIGN_OR_RAISE(auto file, base_fs_->OpenInputFile(info));
+ return std::make_shared<io::SlowRandomAccessFile>(file, latencies_);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenOutputStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ latencies_->Sleep();
+ // XXX Should we have a SlowOutputStream that waits on Flush() and Close()?
+ return base_fs_->OpenOutputStream(path, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> SlowFileSystem::OpenAppendStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ latencies_->Sleep();
+ return base_fs_->OpenAppendStream(path, metadata);
+}
+
+Status CopyFiles(const std::vector<FileLocator>& sources,
+ const std::vector<FileLocator>& destinations,
+ const io::IOContext& io_context, int64_t chunk_size, bool use_threads) {
+ if (sources.size() != destinations.size()) {
+ return Status::Invalid("Trying to copy ", sources.size(), " files into ",
+ destinations.size(), " paths.");
+ }
+
+ auto copy_one_file = [&](int i) {
+ if (sources[i].filesystem->Equals(destinations[i].filesystem)) {
+ return sources[i].filesystem->CopyFile(sources[i].path, destinations[i].path);
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto source,
+ sources[i].filesystem->OpenInputStream(sources[i].path));
+ ARROW_ASSIGN_OR_RAISE(const auto metadata, source->ReadMetadata());
+
+ ARROW_ASSIGN_OR_RAISE(auto destination, destinations[i].filesystem->OpenOutputStream(
+ destinations[i].path, metadata));
+ RETURN_NOT_OK(internal::CopyStream(source, destination, chunk_size, io_context));
+ return destination->Close();
+ };
+
+ return ::arrow::internal::OptionalParallelFor(
+ use_threads, static_cast<int>(sources.size()), std::move(copy_one_file),
+ io_context.executor());
+}
+
+Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
+ const FileSelector& source_sel,
+ const std::shared_ptr<FileSystem>& destination_fs,
+ const std::string& destination_base_dir, const io::IOContext& io_context,
+ int64_t chunk_size, bool use_threads) {
+ ARROW_ASSIGN_OR_RAISE(auto source_infos, source_fs->GetFileInfo(source_sel));
+ if (source_infos.empty()) {
+ return Status::OK();
+ }
+
+ std::vector<FileLocator> sources, destinations;
+ std::vector<std::string> dirs;
+
+ for (const FileInfo& source_info : source_infos) {
+ auto relative = internal::RemoveAncestor(source_sel.base_dir, source_info.path());
+ if (!relative.has_value()) {
+ return Status::Invalid("GetFileInfo() yielded path '", source_info.path(),
+ "', which is outside base dir '", source_sel.base_dir, "'");
+ }
+
+ auto destination_path =
+ internal::ConcatAbstractPath(destination_base_dir, relative->to_string());
+
+ if (source_info.IsDirectory()) {
+ dirs.push_back(destination_path);
+ } else if (source_info.IsFile()) {
+ sources.push_back({source_fs, source_info.path()});
+ destinations.push_back({destination_fs, destination_path});
+ }
+ }
+
+ auto create_one_dir = [&](int i) { return destination_fs->CreateDir(dirs[i]); };
+
+ dirs = internal::MinimalCreateDirSet(std::move(dirs));
+ RETURN_NOT_OK(::arrow::internal::OptionalParallelFor(
+ use_threads, static_cast<int>(dirs.size()), std::move(create_one_dir),
+ io_context.executor()));
+
+ return CopyFiles(sources, destinations, io_context, chunk_size, use_threads);
+}
+
+namespace {
+
+Result<Uri> ParseFileSystemUri(const std::string& uri_string) {
+ Uri uri;
+ auto status = uri.Parse(uri_string);
+ if (!status.ok()) {
+#ifdef _WIN32
+ // Could be a "file:..." URI with backslashes instead of regular slashes.
+ RETURN_NOT_OK(uri.Parse(ToSlashes(uri_string)));
+ if (uri.scheme() != "file") {
+ return status;
+ }
+#else
+ return status;
+#endif
+ }
+ return std::move(uri);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriReal(const Uri& uri,
+ const std::string& uri_string,
+ const io::IOContext& io_context,
+ std::string* out_path) {
+ const auto scheme = uri.scheme();
+
+ if (scheme == "file") {
+ std::string path;
+ ARROW_ASSIGN_OR_RAISE(auto options, LocalFileSystemOptions::FromUri(uri, &path));
+ if (out_path != nullptr) {
+ *out_path = path;
+ }
+ return std::make_shared<LocalFileSystem>(options, io_context);
+ }
+ if (scheme == "hdfs" || scheme == "viewfs") {
+#ifdef ARROW_HDFS
+ ARROW_ASSIGN_OR_RAISE(auto options, HdfsOptions::FromUri(uri));
+ if (out_path != nullptr) {
+ *out_path = uri.path();
+ }
+ ARROW_ASSIGN_OR_RAISE(auto hdfs, HadoopFileSystem::Make(options, io_context));
+ return hdfs;
+#else
+ return Status::NotImplemented("Got HDFS URI but Arrow compiled without HDFS support");
+#endif
+ }
+ if (scheme == "s3") {
+#ifdef ARROW_S3
+ RETURN_NOT_OK(EnsureS3Initialized());
+ ARROW_ASSIGN_OR_RAISE(auto options, S3Options::FromUri(uri, out_path));
+ ARROW_ASSIGN_OR_RAISE(auto s3fs, S3FileSystem::Make(options, io_context));
+ return s3fs;
+#else
+ return Status::NotImplemented("Got S3 URI but Arrow compiled without S3 support");
+#endif
+ }
+
+ if (scheme == "mock") {
+ // MockFileSystem does not have an absolute / relative path distinction,
+ // normalize path by removing leading slash.
+ if (out_path != nullptr) {
+ *out_path = std::string(RemoveLeadingSlash(uri.path()));
+ }
+ return std::make_shared<internal::MockFileSystem>(internal::CurrentTimePoint(),
+ io_context);
+ }
+
+ return Status::Invalid("Unrecognized filesystem type in URI: ", uri_string);
+}
+
+} // namespace
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
+ std::string* out_path) {
+ return FileSystemFromUri(uri_string, io::default_io_context(), out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri_string,
+ const io::IOContext& io_context,
+ std::string* out_path) {
+ ARROW_ASSIGN_OR_RAISE(auto fsuri, ParseFileSystemUri(uri_string));
+ return FileSystemFromUriReal(fsuri, uri_string, io_context, out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(const std::string& uri_string,
+ std::string* out_path) {
+ return FileSystemFromUriOrPath(uri_string, io::default_io_context(), out_path);
+}
+
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+ const std::string& uri_string, const io::IOContext& io_context,
+ std::string* out_path) {
+ if (internal::DetectAbsolutePath(uri_string)) {
+ // Normalize path separators
+ if (out_path != nullptr) {
+ *out_path = ToSlashes(uri_string);
+ }
+ return std::make_shared<LocalFileSystem>();
+ }
+ return FileSystemFromUri(uri_string, io_context, out_path);
+}
+
+Status FileSystemFromUri(const std::string& uri, std::shared_ptr<FileSystem>* out_fs,
+ std::string* out_path) {
+ return FileSystemFromUri(uri, out_path).Value(out_fs);
+}
+
+Status Initialize(const FileSystemGlobalOptions& options) {
+ internal::global_options = options;
+ return Status::OK();
+}
+
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.h
new file mode 100644
index 0000000000..c739471c72
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/filesystem.h
@@ -0,0 +1,532 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <cstdint>
+#include <functional>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/filesystem/type_fwd.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+
+// A system clock time point expressed as a 64-bit (or more) number of
+// nanoseconds since the epoch.
+using TimePoint =
+ std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
+
+ARROW_EXPORT std::string ToString(FileType);
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, FileType);
+
+static const int64_t kNoSize = -1;
+static const TimePoint kNoTime = TimePoint(TimePoint::duration(-1));
+
+/// \brief FileSystem entry info
+struct ARROW_EXPORT FileInfo : public util::EqualityComparable<FileInfo> {
+ FileInfo() = default;
+ FileInfo(FileInfo&&) = default;
+ FileInfo& operator=(FileInfo&&) = default;
+ FileInfo(const FileInfo&) = default;
+ FileInfo& operator=(const FileInfo&) = default;
+
+ explicit FileInfo(std::string path, FileType type = FileType::Unknown)
+ : path_(std::move(path)), type_(type) {}
+
+ /// The file type
+ FileType type() const { return type_; }
+ void set_type(FileType type) { type_ = type; }
+
+ /// The full file path in the filesystem
+ const std::string& path() const { return path_; }
+ void set_path(std::string path) { path_ = std::move(path); }
+
+ /// The file base name (component after the last directory separator)
+ std::string base_name() const;
+
+ // The directory base name (component before the file base name).
+ std::string dir_name() const;
+
+ /// The size in bytes, if available
+ ///
+ /// Only regular files are guaranteed to have a size.
+ int64_t size() const { return size_; }
+ void set_size(int64_t size) { size_ = size; }
+
+ /// The file extension (excluding the dot)
+ std::string extension() const;
+
+ /// The time of last modification, if available
+ TimePoint mtime() const { return mtime_; }
+ void set_mtime(TimePoint mtime) { mtime_ = mtime; }
+
+ bool IsFile() const { return type_ == FileType::File; }
+ bool IsDirectory() const { return type_ == FileType::Directory; }
+
+ bool Equals(const FileInfo& other) const {
+ return type() == other.type() && path() == other.path() && size() == other.size() &&
+ mtime() == other.mtime();
+ }
+
+ std::string ToString() const;
+
+ /// Function object implementing less-than comparison and hashing by
+ /// path, to support sorting infos, using them as keys, and other
+ /// interactions with the STL.
+ struct ByPath {
+ bool operator()(const FileInfo& l, const FileInfo& r) const {
+ return l.path() < r.path();
+ }
+
+ size_t operator()(const FileInfo& i) const {
+ return std::hash<std::string>{}(i.path());
+ }
+ };
+
+ protected:
+ std::string path_;
+ FileType type_ = FileType::Unknown;
+ int64_t size_ = kNoSize;
+ TimePoint mtime_ = kNoTime;
+};
+
+ARROW_EXPORT std::ostream& operator<<(std::ostream& os, const FileInfo&);
+
+/// \brief File selector for filesystem APIs
+struct ARROW_EXPORT FileSelector {
+ /// The directory in which to select files.
+ /// If the path exists but doesn't point to a directory, this should be an error.
+ std::string base_dir;
+ /// The behavior if `base_dir` isn't found in the filesystem. If false,
+ /// an error is returned. If true, an empty selection is returned.
+ bool allow_not_found;
+ /// Whether to recurse into subdirectories.
+ bool recursive;
+ /// The maximum number of subdirectories to recurse into.
+ int32_t max_recursion;
+
+ FileSelector() : allow_not_found(false), recursive(false), max_recursion(INT32_MAX) {}
+};
+
+/// \brief FileSystem, path pair
+struct ARROW_EXPORT FileLocator {
+ std::shared_ptr<FileSystem> filesystem;
+ std::string path;
+};
+
+using FileInfoVector = std::vector<FileInfo>;
+using FileInfoGenerator = std::function<Future<FileInfoVector>()>;
+
+} // namespace fs
+
+template <>
+struct IterationTraits<fs::FileInfoVector> {
+ static fs::FileInfoVector End() { return {}; }
+ static bool IsEnd(const fs::FileInfoVector& val) { return val.empty(); }
+};
+
+namespace fs {
+
+/// \brief Abstract file system API
+class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> {
+ public:
+ virtual ~FileSystem();
+
+ virtual std::string type_name() const = 0;
+
+ /// EXPERIMENTAL: The IOContext associated with this filesystem.
+ const io::IOContext& io_context() const { return io_context_; }
+
+ /// Normalize path for the given filesystem
+ ///
+ /// The default implementation of this method is a no-op, but subclasses
+ /// may allow normalizing irregular path forms (such as Windows local paths).
+ virtual Result<std::string> NormalizePath(std::string path);
+
+ virtual bool Equals(const FileSystem& other) const = 0;
+
+ virtual bool Equals(const std::shared_ptr<FileSystem>& other) const {
+ return Equals(*other);
+ }
+
+ /// Get info for the given target.
+ ///
+ /// Any symlink is automatically dereferenced, recursively.
+ /// A nonexistent or unreachable file returns an Ok status and
+ /// has a FileType of value NotFound. An error status indicates
+ /// a truly exceptional condition (low-level I/O error, etc.).
+ virtual Result<FileInfo> GetFileInfo(const std::string& path) = 0;
+ /// Same, for many targets at once.
+ virtual Result<FileInfoVector> GetFileInfo(const std::vector<std::string>& paths);
+ /// Same, according to a selector.
+ ///
+ /// The selector's base directory will not be part of the results, even if
+ /// it exists.
+ /// If it doesn't exist, see `FileSelector::allow_not_found`.
+ virtual Result<FileInfoVector> GetFileInfo(const FileSelector& select) = 0;
+
+ /// EXPERIMENTAL: async version of GetFileInfo
+ virtual Future<FileInfoVector> GetFileInfoAsync(const std::vector<std::string>& paths);
+
+ /// EXPERIMENTAL: streaming async version of GetFileInfo
+ ///
+ /// The returned generator is not async-reentrant, i.e. you need to wait for
+ /// the returned future to complete before calling the generator again.
+ virtual FileInfoGenerator GetFileInfoGenerator(const FileSelector& select);
+
+ /// Create a directory and subdirectories.
+ ///
+ /// This function succeeds if the directory already exists.
+ virtual Status CreateDir(const std::string& path, bool recursive = true) = 0;
+
+ /// Delete a directory and its contents, recursively.
+ virtual Status DeleteDir(const std::string& path) = 0;
+
+ /// Delete a directory's contents, recursively.
+ ///
+ /// Like DeleteDir, but doesn't delete the directory itself.
+ /// Passing an empty path ("" or "/") is disallowed, see DeleteRootDirContents.
+ virtual Status DeleteDirContents(const std::string& path) = 0;
+
+ /// EXPERIMENTAL: Delete the root directory's contents, recursively.
+ ///
+ /// Implementations may decide to raise an error if this operation is
+ /// too dangerous.
+ // NOTE: may decide to remove this if it's deemed not useful
+ virtual Status DeleteRootDirContents() = 0;
+
+ /// Delete a file.
+ virtual Status DeleteFile(const std::string& path) = 0;
+ /// Delete many files.
+ ///
+ /// The default implementation issues individual delete operations in sequence.
+ virtual Status DeleteFiles(const std::vector<std::string>& paths);
+
+ /// Move / rename a file or directory.
+ ///
+ /// If the destination exists:
+ /// - if it is a non-empty directory, an error is returned
+ /// - otherwise, if it has the same type as the source, it is replaced
+ /// - otherwise, behavior is unspecified (implementation-dependent).
+ virtual Status Move(const std::string& src, const std::string& dest) = 0;
+
+ /// Copy a file.
+ ///
+ /// If the destination exists and is a directory, an error is returned.
+ /// Otherwise, it is replaced.
+ virtual Status CopyFile(const std::string& src, const std::string& dest) = 0;
+
+ /// Open an input stream for sequential reading.
+ virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+ const std::string& path) = 0;
+ /// Open an input stream for sequential reading.
+ ///
+ /// This override assumes the given FileInfo validly represents the file's
+ /// characteristics, and may optimize access depending on them (for example
+ /// avoid querying the file size or its existence).
+ virtual Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info);
+
+ /// Open an input file for random access reading.
+ virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const std::string& path) = 0;
+ /// Open an input file for random access reading.
+ ///
+ /// This override assumes the given FileInfo validly represents the file's
+ /// characteristics, and may optimize access depending on them (for example
+ /// avoid querying the file size or its existence).
+ virtual Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const FileInfo& info);
+
+ /// EXPERIMENTAL: async version of OpenInputStream
+ virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+ const std::string& path);
+ /// EXPERIMENTAL: async version of OpenInputStream
+ virtual Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+ const FileInfo& info);
+
+ /// EXPERIMENTAL: async version of OpenInputFile
+ virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+ const std::string& path);
+ /// EXPERIMENTAL: async version of OpenInputFile
+ virtual Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+ const FileInfo& info);
+
+ /// Open an output stream for sequential writing.
+ ///
+ /// If the target already exists, existing data is truncated.
+ virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(const std::string& path);
+
+ /// Open an output stream for appending.
+ ///
+ /// If the target doesn't exist, a new empty file is created.
+ virtual Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;
+ Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(const std::string& path);
+
+ protected:
+ explicit FileSystem(const io::IOContext& io_context = io::default_io_context())
+ : io_context_(io_context) {}
+
+ io::IOContext io_context_;
+ // Whether metadata operations (such as GetFileInfo or OpenInputStream)
+ // are cheap enough that the default async variants don't bother with
+ // a thread pool.
+ bool default_async_is_sync_ = true;
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation after prepending a fixed base path.
+///
+/// This is useful to expose a logical view of a subtree of a filesystem,
+/// for example a directory in a LocalFileSystem.
+/// This works on abstract paths, i.e. paths using forward slashes and
+/// and a single root "/". Windows paths are not guaranteed to work.
+/// This makes no security guarantee. For example, symlinks may allow to
+/// "escape" the subtree and access other parts of the underlying filesystem.
+class ARROW_EXPORT SubTreeFileSystem : public FileSystem {
+ public:
+ // This constructor may abort if base_path is invalid.
+ explicit SubTreeFileSystem(const std::string& base_path,
+ std::shared_ptr<FileSystem> base_fs);
+ ~SubTreeFileSystem() override;
+
+ std::string type_name() const override { return "subtree"; }
+ std::string base_path() const { return base_path_; }
+ std::shared_ptr<FileSystem> base_fs() const { return base_fs_; }
+
+ Result<std::string> NormalizePath(std::string path) override;
+
+ bool Equals(const FileSystem& other) const override;
+
+ /// \cond FALSE
+ using FileSystem::GetFileInfo;
+ /// \endcond
+ Result<FileInfo> GetFileInfo(const std::string& path) override;
+ Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+ FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+
+ Status CreateDir(const std::string& path, bool recursive = true) override;
+
+ Status DeleteDir(const std::string& path) override;
+ Status DeleteDirContents(const std::string& path) override;
+ Status DeleteRootDirContents() override;
+
+ Status DeleteFile(const std::string& path) override;
+
+ Status Move(const std::string& src, const std::string& dest) override;
+
+ Status CopyFile(const std::string& src, const std::string& dest) override;
+
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const FileInfo& info) override;
+
+ Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+ const std::string& path) override;
+ Future<std::shared_ptr<io::InputStream>> OpenInputStreamAsync(
+ const FileInfo& info) override;
+ Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+ const std::string& path) override;
+ Future<std::shared_ptr<io::RandomAccessFile>> OpenInputFileAsync(
+ const FileInfo& info) override;
+
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+ SubTreeFileSystem() {}
+
+ const std::string base_path_;
+ std::shared_ptr<FileSystem> base_fs_;
+
+ std::string PrependBase(const std::string& s) const;
+ Status PrependBaseNonEmpty(std::string* s) const;
+ Result<std::string> StripBase(const std::string& s) const;
+ Status FixInfo(FileInfo* info) const;
+
+ static Result<std::string> NormalizeBasePath(
+ std::string base_path, const std::shared_ptr<FileSystem>& base_fs);
+};
+
+/// \brief A FileSystem implementation that delegates to another
+/// implementation but inserts latencies at various points.
+class ARROW_EXPORT SlowFileSystem : public FileSystem {
+ public:
+ SlowFileSystem(std::shared_ptr<FileSystem> base_fs,
+ std::shared_ptr<io::LatencyGenerator> latencies);
+ SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency);
+ SlowFileSystem(std::shared_ptr<FileSystem> base_fs, double average_latency,
+ int32_t seed);
+
+ std::string type_name() const override { return "slow"; }
+ bool Equals(const FileSystem& other) const override;
+
+ using FileSystem::GetFileInfo;
+ Result<FileInfo> GetFileInfo(const std::string& path) override;
+ Result<FileInfoVector> GetFileInfo(const FileSelector& select) override;
+
+ Status CreateDir(const std::string& path, bool recursive = true) override;
+
+ Status DeleteDir(const std::string& path) override;
+ Status DeleteDirContents(const std::string& path) override;
+ Status DeleteRootDirContents() override;
+
+ Status DeleteFile(const std::string& path) override;
+
+ Status Move(const std::string& src, const std::string& dest) override;
+
+ Status CopyFile(const std::string& src, const std::string& dest) override;
+
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(const FileInfo& info) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const FileInfo& info) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+ std::shared_ptr<FileSystem> base_fs_;
+ std::shared_ptr<io::LatencyGenerator> latencies_;
+};
+
+/// \defgroup filesystem-factories Functions for creating FileSystem instances
+///
+/// @{
+
+/// \brief Create a new FileSystem by URI
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+ std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Recognized schemes are "file", "mock", "hdfs" and "s3fs".
+///
+/// \param[in] uri a URI-based path, ex: file:///some/local/path
+/// \param[in] io_context an IOContext which will be associated with the filesystem
+/// \param[out] out_path (optional) Path inside the filesystem.
+/// \return out_fs FileSystem instance.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUri(const std::string& uri,
+ const io::IOContext& io_context,
+ std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths. Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+ const std::string& uri, std::string* out_path = NULLPTR);
+
+/// \brief Create a new FileSystem by URI with a custom IO context
+///
+/// Same as FileSystemFromUri, but in addition also recognize non-URIs
+/// and treat them as local filesystem paths. Only absolute local filesystem
+/// paths are allowed.
+ARROW_EXPORT
+Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(
+ const std::string& uri, const io::IOContext& io_context,
+ std::string* out_path = NULLPTR);
+
+/// @}
+
+/// \brief Copy files, including from one FileSystem to another
+///
+/// If a source and destination are resident in the same FileSystem FileSystem::CopyFile
+/// will be used, otherwise the file will be opened as a stream in both FileSystems and
+/// chunks copied from the source to the destination. No directories will be created.
+ARROW_EXPORT
+Status CopyFiles(const std::vector<FileLocator>& sources,
+ const std::vector<FileLocator>& destinations,
+ const io::IOContext& io_context = io::default_io_context(),
+ int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+/// \brief Copy selected files, including from one FileSystem to another
+///
+/// Directories will be created under the destination base directory as needed.
+ARROW_EXPORT
+Status CopyFiles(const std::shared_ptr<FileSystem>& source_fs,
+ const FileSelector& source_sel,
+ const std::shared_ptr<FileSystem>& destination_fs,
+ const std::string& destination_base_dir,
+ const io::IOContext& io_context = io::default_io_context(),
+ int64_t chunk_size = 1024 * 1024, bool use_threads = true);
+
+struct FileSystemGlobalOptions {
+ /// Path to a single PEM file holding all TLS CA certificates
+ ///
+ /// If empty, the underlying TLS library's defaults will be used.
+ std::string tls_ca_file_path;
+
+ /// Path to a directory holding TLS CA certificates in individual PEM files
+ /// named along the OpenSSL "hashed" format.
+ ///
+ /// If empty, the underlying TLS library's defaults will be used.
+ std::string tls_ca_dir_path;
+};
+
+/// Experimental: optional global initialization routine
+///
+/// This is for environments (such as manylinux) where the path
+/// to TLS CA certificates needs to be configured at runtime.
+ARROW_EXPORT
+Status Initialize(const FileSystemGlobalOptions& options);
+
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc
new file mode 100644
index 0000000000..775fd746aa
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.cc
@@ -0,0 +1,448 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <chrono>
+#include <cstring>
+#include <sstream>
+#include <utility>
+
+#ifdef _WIN32
+#include "arrow/util/windows_compatibility.h"
+#else
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#endif
+
+#include "arrow/filesystem/localfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/file.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/uri.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+
+using ::arrow::internal::IOErrorFromErrno;
+#ifdef _WIN32
+using ::arrow::internal::IOErrorFromWinError;
+#endif
+using ::arrow::internal::NativePathString;
+using ::arrow::internal::PlatformFilename;
+
+namespace internal {
+
+#ifdef _WIN32
+static bool IsDriveLetter(char c) {
+ // Can't use locale-dependent functions from the C/C++ stdlib
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+#endif
+
+bool DetectAbsolutePath(const std::string& s) {
+ // Is it a /-prefixed local path?
+ if (s.length() >= 1 && s[0] == '/') {
+ return true;
+ }
+#ifdef _WIN32
+ // Is it a \-prefixed local path?
+ if (s.length() >= 1 && s[0] == '\\') {
+ return true;
+ }
+ // Does it start with a drive letter in addition to being /- or \-prefixed,
+ // e.g. "C:\..."?
+ if (s.length() >= 3 && s[1] == ':' && (s[2] == '/' || s[2] == '\\') &&
+ IsDriveLetter(s[0])) {
+ return true;
+ }
+#endif
+ return false;
+}
+
+} // namespace internal
+
+namespace {
+
+#ifdef _WIN32
+
+std::string NativeToString(const NativePathString& ns) {
+ PlatformFilename fn(ns);
+ return fn.ToString();
+}
+
+TimePoint ToTimePoint(FILETIME ft) {
+ // Hundreds of nanoseconds between January 1, 1601 (UTC) and the Unix epoch.
+ static constexpr int64_t kFileTimeEpoch = 11644473600LL * 10000000;
+
+ int64_t hundreds = (static_cast<int64_t>(ft.dwHighDateTime) << 32) + ft.dwLowDateTime -
+ kFileTimeEpoch; // hundreds of ns since Unix epoch
+ std::chrono::nanoseconds ns_count(100 * hundreds);
+ return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
+}
+
+FileInfo FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION& information) {
+ FileInfo info;
+ if (information.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+ info.set_type(FileType::Directory);
+ info.set_size(kNoSize);
+ } else {
+ // Regular file
+ info.set_type(FileType::File);
+ info.set_size((static_cast<int64_t>(information.nFileSizeHigh) << 32) +
+ information.nFileSizeLow);
+ }
+ info.set_mtime(ToTimePoint(information.ftLastWriteTime));
+ return info;
+}
+
+Result<FileInfo> StatFile(const std::wstring& path) {
+ HANDLE h;
+ std::string bytes_path = NativeToString(path);
+ FileInfo info;
+
+ /* Inspired by CPython, see Modules/posixmodule.c */
+ h = CreateFileW(path.c_str(), FILE_READ_ATTRIBUTES, /* desired access */
+ 0, /* share mode */
+ NULL, /* security attributes */
+ OPEN_EXISTING,
+ /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */
+ FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS, NULL);
+
+ if (h == INVALID_HANDLE_VALUE) {
+ DWORD err = GetLastError();
+ if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) {
+ info.set_path(bytes_path);
+ info.set_type(FileType::NotFound);
+ info.set_mtime(kNoTime);
+ info.set_size(kNoSize);
+ return info;
+ } else {
+ return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
+ bytes_path, "'");
+ }
+ }
+ BY_HANDLE_FILE_INFORMATION information;
+ if (!GetFileInformationByHandle(h, &information)) {
+ CloseHandle(h);
+ return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
+ bytes_path, "'");
+ }
+ CloseHandle(h);
+ info = FileInformationToFileInfo(information);
+ info.set_path(bytes_path);
+ return info;
+}
+
+#else // POSIX systems
+
+TimePoint ToTimePoint(const struct timespec& s) {
+ std::chrono::nanoseconds ns_count(static_cast<int64_t>(s.tv_sec) * 1000000000 +
+ static_cast<int64_t>(s.tv_nsec));
+ return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
+}
+
+FileInfo StatToFileInfo(const struct stat& s) {
+ FileInfo info;
+ if (S_ISREG(s.st_mode)) {
+ info.set_type(FileType::File);
+ info.set_size(static_cast<int64_t>(s.st_size));
+ } else if (S_ISDIR(s.st_mode)) {
+ info.set_type(FileType::Directory);
+ info.set_size(kNoSize);
+ } else {
+ info.set_type(FileType::Unknown);
+ info.set_size(kNoSize);
+ }
+#ifdef __APPLE__
+ // macOS doesn't use the POSIX-compliant spelling
+ info.set_mtime(ToTimePoint(s.st_mtimespec));
+#else
+ info.set_mtime(ToTimePoint(s.st_mtim));
+#endif
+ return info;
+}
+
+Result<FileInfo> StatFile(const std::string& path) {
+ FileInfo info;
+ struct stat s;
+ int r = stat(path.c_str(), &s);
+ if (r == -1) {
+ if (errno == ENOENT || errno == ENOTDIR || errno == ELOOP) {
+ info.set_type(FileType::NotFound);
+ info.set_mtime(kNoTime);
+ info.set_size(kNoSize);
+ } else {
+ return IOErrorFromErrno(errno, "Failed stat()ing path '", path, "'");
+ }
+ } else {
+ info = StatToFileInfo(s);
+ }
+ info.set_path(path);
+ return info;
+}
+
+#endif
+
+Status StatSelector(const PlatformFilename& dir_fn, const FileSelector& select,
+ int32_t nesting_depth, std::vector<FileInfo>* out) {
+ auto result = ListDir(dir_fn);
+ if (!result.ok()) {
+ auto status = result.status();
+ if (select.allow_not_found && status.IsIOError()) {
+ ARROW_ASSIGN_OR_RAISE(bool exists, FileExists(dir_fn));
+ if (!exists) {
+ return Status::OK();
+ }
+ }
+ return status;
+ }
+
+ for (const auto& child_fn : *result) {
+ PlatformFilename full_fn = dir_fn.Join(child_fn);
+ ARROW_ASSIGN_OR_RAISE(FileInfo info, StatFile(full_fn.ToNative()));
+ if (info.type() != FileType::NotFound) {
+ out->push_back(std::move(info));
+ }
+ if (nesting_depth < select.max_recursion && select.recursive &&
+ info.type() == FileType::Directory) {
+ RETURN_NOT_OK(StatSelector(full_fn, select, nesting_depth + 1, out));
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+LocalFileSystemOptions LocalFileSystemOptions::Defaults() {
+ return LocalFileSystemOptions();
+}
+
+bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
+ return use_mmap == other.use_mmap;
+}
+
+Result<LocalFileSystemOptions> LocalFileSystemOptions::FromUri(
+ const ::arrow::internal::Uri& uri, std::string* out_path) {
+ if (!uri.username().empty() || !uri.password().empty()) {
+ return Status::Invalid("Unsupported username or password in local URI: '",
+ uri.ToString(), "'");
+ }
+ std::string path;
+ const auto host = uri.host();
+ if (!host.empty()) {
+#ifdef _WIN32
+ std::stringstream ss;
+ ss << "//" << host << "/" << internal::RemoveLeadingSlash(uri.path());
+ *out_path = ss.str();
+#else
+ return Status::Invalid("Unsupported hostname in non-Windows local URI: '",
+ uri.ToString(), "'");
+#endif
+ } else {
+ *out_path = uri.path();
+ }
+
+ // TODO handle use_mmap option
+ return LocalFileSystemOptions();
+}
+
+LocalFileSystem::LocalFileSystem(const io::IOContext& io_context)
+ : FileSystem(io_context), options_(LocalFileSystemOptions::Defaults()) {}
+
+LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options,
+ const io::IOContext& io_context)
+ : FileSystem(io_context), options_(options) {}
+
+LocalFileSystem::~LocalFileSystem() {}
+
+Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ return fn.ToString();
+}
+
+bool LocalFileSystem::Equals(const FileSystem& other) const {
+ if (other.type_name() != type_name()) {
+ return false;
+ } else {
+ const auto& localfs = ::arrow::internal::checked_cast<const LocalFileSystem&>(other);
+ return options_.Equals(localfs.options());
+ }
+}
+
+Result<FileInfo> LocalFileSystem::GetFileInfo(const std::string& path) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ return StatFile(fn.ToNative());
+}
+
+Result<std::vector<FileInfo>> LocalFileSystem::GetFileInfo(const FileSelector& select) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(select.base_dir));
+ std::vector<FileInfo> results;
+ RETURN_NOT_OK(StatSelector(fn, select, 0, &results));
+ return results;
+}
+
+Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ if (recursive) {
+ return ::arrow::internal::CreateDirTree(fn).status();
+ } else {
+ return ::arrow::internal::CreateDir(fn).status();
+ }
+}
+
+Status LocalFileSystem::DeleteDir(const std::string& path) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ auto st = ::arrow::internal::DeleteDirTree(fn, /*allow_not_found=*/false).status();
+ if (!st.ok()) {
+ // TODO Status::WithPrefix()?
+ std::stringstream ss;
+ ss << "Cannot delete directory '" << path << "': " << st.message();
+ return st.WithMessage(ss.str());
+ }
+ return Status::OK();
+}
+
+Status LocalFileSystem::DeleteDirContents(const std::string& path) {
+ if (internal::IsEmptyPath(path)) {
+ return internal::InvalidDeleteDirContents(path);
+ }
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ auto st = ::arrow::internal::DeleteDirContents(fn, /*allow_not_found=*/false).status();
+ if (!st.ok()) {
+ std::stringstream ss;
+ ss << "Cannot delete directory contents in '" << path << "': " << st.message();
+ return st.WithMessage(ss.str());
+ }
+ return Status::OK();
+}
+
+Status LocalFileSystem::DeleteRootDirContents() {
+ return Status::Invalid("LocalFileSystem::DeleteRootDirContents is strictly forbidden");
+}
+
+Status LocalFileSystem::DeleteFile(const std::string& path) {
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ return ::arrow::internal::DeleteFile(fn, /*allow_not_found=*/false).status();
+}
+
+Status LocalFileSystem::Move(const std::string& src, const std::string& dest) {
+ ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
+ ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
+
+#ifdef _WIN32
+ if (!MoveFileExW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
+ MOVEFILE_REPLACE_EXISTING)) {
+ return IOErrorFromWinError(GetLastError(), "Failed renaming '", sfn.ToString(),
+ "' to '", dfn.ToString(), "'");
+ }
+#else
+ if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) == -1) {
+ return IOErrorFromErrno(errno, "Failed renaming '", sfn.ToString(), "' to '",
+ dfn.ToString(), "'");
+ }
+#endif
+ return Status::OK();
+}
+
+Status LocalFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+ ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
+ ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
+ // XXX should we use fstat() to compare inodes?
+ if (sfn.ToNative() == dfn.ToNative()) {
+ return Status::OK();
+ }
+
+#ifdef _WIN32
+ if (!CopyFileW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
+ FALSE /* bFailIfExists */)) {
+ return IOErrorFromWinError(GetLastError(), "Failed copying '", sfn.ToString(),
+ "' to '", dfn.ToString(), "'");
+ }
+ return Status::OK();
+#else
+ ARROW_ASSIGN_OR_RAISE(auto is, OpenInputStream(src));
+ ARROW_ASSIGN_OR_RAISE(auto os, OpenOutputStream(dest));
+ RETURN_NOT_OK(internal::CopyStream(is, os, 1024 * 1024 /* chunk_size */, io_context()));
+ RETURN_NOT_OK(os->Close());
+ return is->Close();
+#endif
+}
+
+namespace {
+
+template <typename InputStreamType>
+Result<std::shared_ptr<InputStreamType>> OpenInputStreamGeneric(
+ const std::string& path, const LocalFileSystemOptions& options,
+ const io::IOContext& io_context) {
+ if (options.use_mmap) {
+ return io::MemoryMappedFile::Open(path, io::FileMode::READ);
+ } else {
+ return io::ReadableFile::Open(path, io_context.pool());
+ }
+}
+
+} // namespace
+
+Result<std::shared_ptr<io::InputStream>> LocalFileSystem::OpenInputStream(
+ const std::string& path) {
+ return OpenInputStreamGeneric<io::InputStream>(path, options_, io_context());
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> LocalFileSystem::OpenInputFile(
+ const std::string& path) {
+ return OpenInputStreamGeneric<io::RandomAccessFile>(path, options_, io_context());
+}
+
+namespace {
+
+Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::string& path,
+ bool truncate,
+ bool append) {
+ int fd;
+ bool write_only = true;
+ ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
+ ARROW_ASSIGN_OR_RAISE(
+ fd, ::arrow::internal::FileOpenWritable(fn, write_only, truncate, append));
+ auto maybe_stream = io::FileOutputStream::Open(fd);
+ if (!maybe_stream.ok()) {
+ ARROW_UNUSED(::arrow::internal::FileClose(fd));
+ }
+ return maybe_stream;
+}
+
+} // namespace
+
+Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ bool truncate = true;
+ bool append = false;
+ return OpenOutputStreamGeneric(path, truncate, append);
+}
+
+Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ bool truncate = false;
+ bool append = true;
+ return OpenOutputStreamGeneric(path, truncate, append);
+}
+
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.h
new file mode 100644
index 0000000000..f8e77aee59
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/localfs.h
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+
+namespace arrow {
+namespace internal {
+
+class Uri;
+
+}
+
+namespace fs {
+
+/// Options for the LocalFileSystem implementation.
+struct ARROW_EXPORT LocalFileSystemOptions {
+ /// Whether OpenInputStream and OpenInputFile return a mmap'ed file,
+ /// or a regular one.
+ bool use_mmap = false;
+
+ /// \brief Initialize with defaults
+ static LocalFileSystemOptions Defaults();
+
+ bool Equals(const LocalFileSystemOptions& other) const;
+
+ static Result<LocalFileSystemOptions> FromUri(const ::arrow::internal::Uri& uri,
+ std::string* out_path);
+};
+
+/// \brief A FileSystem implementation accessing files on the local machine.
+///
+/// This class handles only `/`-separated paths. If desired, conversion
+/// from Windows backslash-separated paths should be done by the caller.
+/// Details such as symlinks are abstracted away (symlinks are always
+/// followed, except when deleting an entry).
+class ARROW_EXPORT LocalFileSystem : public FileSystem {
+ public:
+ explicit LocalFileSystem(const io::IOContext& = io::default_io_context());
+ explicit LocalFileSystem(const LocalFileSystemOptions&,
+ const io::IOContext& = io::default_io_context());
+ ~LocalFileSystem() override;
+
+ std::string type_name() const override { return "local"; }
+
+ Result<std::string> NormalizePath(std::string path) override;
+
+ bool Equals(const FileSystem& other) const override;
+
+ LocalFileSystemOptions options() const { return options_; }
+
+ /// \cond FALSE
+ using FileSystem::GetFileInfo;
+ /// \endcond
+ Result<FileInfo> GetFileInfo(const std::string& path) override;
+ Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+ Status CreateDir(const std::string& path, bool recursive = true) override;
+
+ Status DeleteDir(const std::string& path) override;
+ Status DeleteDirContents(const std::string& path) override;
+ Status DeleteRootDirContents() override;
+
+ Status DeleteFile(const std::string& path) override;
+
+ Status Move(const std::string& src, const std::string& dest) override;
+
+ Status CopyFile(const std::string& src, const std::string& dest) override;
+
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ protected:
+ LocalFileSystemOptions options_;
+};
+
+namespace internal {
+
+// Return whether the string is detected as a local absolute path.
+ARROW_EXPORT
+bool DetectAbsolutePath(const std::string& s);
+
+} // namespace internal
+
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc
new file mode 100644
index 0000000000..14a38283b2
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.cc
@@ -0,0 +1,780 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <iterator>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/filesystem/mockfs.h"
+#include "arrow/filesystem/path_util.h"
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/future.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/variant.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////
+// Filesystem structure
+
+class Entry;
+
+struct File {
+ TimePoint mtime;
+ std::string name;
+ std::shared_ptr<Buffer> data;
+ std::shared_ptr<const KeyValueMetadata> metadata;
+
+ File(TimePoint mtime, std::string name) : mtime(mtime), name(std::move(name)) {}
+
+ int64_t size() const { return data ? data->size() : 0; }
+
+ explicit operator util::string_view() const {
+ if (data) {
+ return util::string_view(*data);
+ } else {
+ return "";
+ }
+ }
+};
+
+struct Directory {
+ std::string name;
+ TimePoint mtime;
+ std::map<std::string, std::unique_ptr<Entry>> entries;
+
+ Directory(std::string name, TimePoint mtime) : name(std::move(name)), mtime(mtime) {}
+ Directory(Directory&& other) noexcept
+ : name(std::move(other.name)),
+ mtime(other.mtime),
+ entries(std::move(other.entries)) {}
+
+ Directory& operator=(Directory&& other) noexcept {
+ name = std::move(other.name);
+ mtime = other.mtime;
+ entries = std::move(other.entries);
+ return *this;
+ }
+
+ Entry* Find(const std::string& s) {
+ auto it = entries.find(s);
+ if (it != entries.end()) {
+ return it->second.get();
+ } else {
+ return nullptr;
+ }
+ }
+
+ bool CreateEntry(const std::string& s, std::unique_ptr<Entry> entry) {
+ DCHECK(!s.empty());
+ auto p = entries.emplace(s, std::move(entry));
+ return p.second;
+ }
+
+ void AssignEntry(const std::string& s, std::unique_ptr<Entry> entry) {
+ DCHECK(!s.empty());
+ entries[s] = std::move(entry);
+ }
+
+ bool DeleteEntry(const std::string& s) { return entries.erase(s) > 0; }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Directory);
+};
+
+// A filesystem entry
+using EntryBase = util::Variant<std::nullptr_t, File, Directory>;
+
+class Entry : public EntryBase {
+ public:
+ Entry(Entry&&) = default;
+ Entry& operator=(Entry&&) = default;
+ explicit Entry(Directory&& v) : EntryBase(std::move(v)) {}
+ explicit Entry(File&& v) : EntryBase(std::move(v)) {}
+
+ bool is_dir() const { return util::holds_alternative<Directory>(*this); }
+
+ bool is_file() const { return util::holds_alternative<File>(*this); }
+
+ Directory& as_dir() { return util::get<Directory>(*this); }
+
+ File& as_file() { return util::get<File>(*this); }
+
+ // Get info for this entry. Note the path() property isn't set.
+ FileInfo GetInfo() {
+ FileInfo info;
+ if (is_dir()) {
+ Directory& dir = as_dir();
+ info.set_type(FileType::Directory);
+ info.set_mtime(dir.mtime);
+ } else {
+ DCHECK(is_file());
+ File& file = as_file();
+ info.set_type(FileType::File);
+ info.set_mtime(file.mtime);
+ info.set_size(file.size());
+ }
+ return info;
+ }
+
+ // Get info for this entry, knowing the parent path.
+ FileInfo GetInfo(const std::string& base_path) {
+ FileInfo info;
+ if (is_dir()) {
+ Directory& dir = as_dir();
+ info.set_type(FileType::Directory);
+ info.set_mtime(dir.mtime);
+ info.set_path(ConcatAbstractPath(base_path, dir.name));
+ } else {
+ DCHECK(is_file());
+ File& file = as_file();
+ info.set_type(FileType::File);
+ info.set_mtime(file.mtime);
+ info.set_size(file.size());
+ info.set_path(ConcatAbstractPath(base_path, file.name));
+ }
+ return info;
+ }
+
+ // Set the entry name
+ void SetName(const std::string& name) {
+ if (is_dir()) {
+ as_dir().name = name;
+ } else {
+ DCHECK(is_file());
+ as_file().name = name;
+ }
+ }
+
+ private:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Entry);
+};
+
+////////////////////////////////////////////////////////////////////////////
+// Streams
+
+class MockFSOutputStream : public io::OutputStream {
+ public:
+ MockFSOutputStream(File* file, MemoryPool* pool)
+ : file_(file), builder_(pool), closed_(false) {}
+
+ ~MockFSOutputStream() override = default;
+
+ // Implement the OutputStream interface
+ Status Close() override {
+ if (!closed_) {
+ RETURN_NOT_OK(builder_.Finish(&file_->data));
+ closed_ = true;
+ }
+ return Status::OK();
+ }
+
+ Status Abort() override {
+ if (!closed_) {
+ // MockFSOutputStream is mainly used for debugging and testing, so
+ // mark an aborted file's contents explicitly.
+ std::stringstream ss;
+ ss << "MockFSOutputStream aborted after " << file_->size() << " bytes written";
+ file_->data = Buffer::FromString(ss.str());
+ closed_ = true;
+ }
+ return Status::OK();
+ }
+
+ bool closed() const override { return closed_; }
+
+ Result<int64_t> Tell() const override {
+ if (closed_) {
+ return Status::Invalid("Invalid operation on closed stream");
+ }
+ return builder_.length();
+ }
+
+ Status Write(const void* data, int64_t nbytes) override {
+ if (closed_) {
+ return Status::Invalid("Invalid operation on closed stream");
+ }
+ return builder_.Append(data, nbytes);
+ }
+
+ protected:
+ File* file_;
+ BufferBuilder builder_;
+ bool closed_;
+};
+
+class MockFSInputStream : public io::BufferReader {
+ public:
+ explicit MockFSInputStream(const File& file)
+ : io::BufferReader(file.data), metadata_(file.metadata) {}
+
+ Result<std::shared_ptr<const KeyValueMetadata>> ReadMetadata() override {
+ return metadata_;
+ }
+
+ protected:
+ std::shared_ptr<const KeyValueMetadata> metadata_;
+};
+
+} // namespace
+
+std::ostream& operator<<(std::ostream& os, const MockDirInfo& di) {
+ return os << "'" << di.full_path << "' [mtime=" << di.mtime.time_since_epoch().count()
+ << "]";
+}
+
+std::ostream& operator<<(std::ostream& os, const MockFileInfo& di) {
+ return os << "'" << di.full_path << "' [mtime=" << di.mtime.time_since_epoch().count()
+ << ", size=" << di.data.length() << "]";
+}
+
+////////////////////////////////////////////////////////////////////////////
+// MockFileSystem implementation
+
+class MockFileSystem::Impl {
+ public:
+ TimePoint current_time;
+ MemoryPool* pool;
+
+ // The root directory
+ Entry root;
+ std::mutex mutex;
+
+ Impl(TimePoint current_time, MemoryPool* pool)
+ : current_time(current_time), pool(pool), root(Directory("", current_time)) {}
+
+ std::unique_lock<std::mutex> lock_guard() {
+ return std::unique_lock<std::mutex>(mutex);
+ }
+
+ Directory& RootDir() { return root.as_dir(); }
+
+ template <typename It>
+ Entry* FindEntry(It it, It end, size_t* nconsumed) {
+ size_t consumed = 0;
+ Entry* entry = &root;
+
+ for (; it != end; ++it) {
+ const std::string& part = *it;
+ DCHECK(entry->is_dir());
+ Entry* child = entry->as_dir().Find(part);
+ if (child == nullptr) {
+ // Partial find only
+ break;
+ }
+ ++consumed;
+ entry = child;
+ if (entry->is_file()) {
+ // Cannot go any further
+ break;
+ }
+ // Recurse
+ }
+ *nconsumed = consumed;
+ return entry;
+ }
+
+ // Find an entry, allowing partial matching
+ Entry* FindEntry(const std::vector<std::string>& parts, size_t* nconsumed) {
+ return FindEntry(parts.begin(), parts.end(), nconsumed);
+ }
+
+ // Find an entry, only full matching allowed
+ Entry* FindEntry(const std::vector<std::string>& parts) {
+ size_t consumed;
+ auto entry = FindEntry(parts, &consumed);
+ return (consumed == parts.size()) ? entry : nullptr;
+ }
+
+ // Find the parent entry, only full matching allowed
+ Entry* FindParent(const std::vector<std::string>& parts) {
+ if (parts.size() == 0) {
+ return nullptr;
+ }
+ size_t consumed;
+ auto last = parts.end();
+ last--;
+ auto entry = FindEntry(parts.begin(), last, &consumed);
+ return (consumed == parts.size() - 1) ? entry : nullptr;
+ }
+
+ void GatherInfos(const FileSelector& select, const std::string& base_path,
+ const Directory& base_dir, int32_t nesting_depth,
+ std::vector<FileInfo>* infos) {
+ for (const auto& pair : base_dir.entries) {
+ Entry* child = pair.second.get();
+ infos->push_back(child->GetInfo(base_path));
+ if (select.recursive && nesting_depth < select.max_recursion && child->is_dir()) {
+ Directory& child_dir = child->as_dir();
+ std::string child_path = infos->back().path();
+ GatherInfos(select, std::move(child_path), child_dir, nesting_depth + 1, infos);
+ }
+ }
+ }
+
+ void DumpDirs(const std::string& prefix, const Directory& dir,
+ std::vector<MockDirInfo>* out) {
+ std::string path = prefix + dir.name;
+ if (!path.empty()) {
+ out->push_back({path, dir.mtime});
+ path += "/";
+ }
+ for (const auto& pair : dir.entries) {
+ Entry* child = pair.second.get();
+ if (child->is_dir()) {
+ DumpDirs(path, child->as_dir(), out);
+ }
+ }
+ }
+
+ void DumpFiles(const std::string& prefix, const Directory& dir,
+ std::vector<MockFileInfo>* out) {
+ std::string path = prefix + dir.name;
+ if (!path.empty()) {
+ path += "/";
+ }
+ for (const auto& pair : dir.entries) {
+ Entry* child = pair.second.get();
+ if (child->is_file()) {
+ auto& file = child->as_file();
+ out->push_back({path + file.name, file.mtime, util::string_view(file)});
+ } else if (child->is_dir()) {
+ DumpFiles(path, child->as_dir(), out);
+ }
+ }
+ }
+
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path, bool append,
+ const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ Entry* parent = FindParent(parts);
+ if (parent == nullptr || !parent->is_dir()) {
+ return PathNotFound(path);
+ }
+ // Find the file in the parent dir, or create it
+ const auto& name = parts.back();
+ Entry* child = parent->as_dir().Find(name);
+ File* file;
+ if (child == nullptr) {
+ child = new Entry(File(current_time, name));
+ parent->as_dir().AssignEntry(name, std::unique_ptr<Entry>(child));
+ file = &child->as_file();
+ } else if (child->is_file()) {
+ file = &child->as_file();
+ file->mtime = current_time;
+ } else {
+ return NotAFile(path);
+ }
+ file->metadata = metadata;
+ auto ptr = std::make_shared<MockFSOutputStream>(file, pool);
+ if (append && file->data) {
+ RETURN_NOT_OK(ptr->Write(file->data->data(), file->data->size()));
+ }
+ return ptr;
+ }
+
+ Result<std::shared_ptr<io::BufferReader>> OpenInputReader(const std::string& path) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ Entry* entry = FindEntry(parts);
+ if (entry == nullptr) {
+ return PathNotFound(path);
+ }
+ if (!entry->is_file()) {
+ return NotAFile(path);
+ }
+ return std::make_shared<MockFSInputStream>(entry->as_file());
+ }
+};
+
+MockFileSystem::~MockFileSystem() = default;
+
+MockFileSystem::MockFileSystem(TimePoint current_time, const io::IOContext& io_context) {
+ impl_ = std::unique_ptr<Impl>(new Impl(current_time, io_context.pool()));
+}
+
+bool MockFileSystem::Equals(const FileSystem& other) const { return this == &other; }
+
+Status MockFileSystem::CreateDir(const std::string& path, bool recursive) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ size_t consumed;
+ Entry* entry = impl_->FindEntry(parts, &consumed);
+ if (!entry->is_dir()) {
+ auto file_path = JoinAbstractPath(parts.begin(), parts.begin() + consumed);
+ return Status::IOError("Cannot create directory '", path, "': ", "ancestor '",
+ file_path, "' is not a directory");
+ }
+ if (!recursive && (parts.size() - consumed) > 1) {
+ return Status::IOError("Cannot create directory '", path,
+ "': ", "parent does not exist");
+ }
+ for (size_t i = consumed; i < parts.size(); ++i) {
+ const auto& name = parts[i];
+ std::unique_ptr<Entry> child(new Entry(Directory(name, impl_->current_time)));
+ Entry* child_ptr = child.get();
+ bool inserted = entry->as_dir().CreateEntry(name, std::move(child));
+ // No race condition on insertion is possible, as all operations are locked
+ DCHECK(inserted);
+ entry = child_ptr;
+ }
+ return Status::OK();
+}
+
+Status MockFileSystem::DeleteDir(const std::string& path) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ Entry* parent = impl_->FindParent(parts);
+ if (parent == nullptr || !parent->is_dir()) {
+ return PathNotFound(path);
+ }
+ Directory& parent_dir = parent->as_dir();
+ auto child = parent_dir.Find(parts.back());
+ if (child == nullptr) {
+ return PathNotFound(path);
+ }
+ if (!child->is_dir()) {
+ return NotADir(path);
+ }
+
+ bool deleted = parent_dir.DeleteEntry(parts.back());
+ DCHECK(deleted);
+ return Status::OK();
+}
+
+Status MockFileSystem::DeleteDirContents(const std::string& path) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ if (parts.empty()) {
+ // Wipe filesystem
+ return internal::InvalidDeleteDirContents(path);
+ }
+
+ Entry* entry = impl_->FindEntry(parts);
+ if (entry == nullptr) {
+ return PathNotFound(path);
+ }
+ if (!entry->is_dir()) {
+ return NotADir(path);
+ }
+ entry->as_dir().entries.clear();
+ return Status::OK();
+}
+
+Status MockFileSystem::DeleteRootDirContents() {
+ auto guard = impl_->lock_guard();
+
+ impl_->RootDir().entries.clear();
+ return Status::OK();
+}
+
+Status MockFileSystem::DeleteFile(const std::string& path) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ Entry* parent = impl_->FindParent(parts);
+ if (parent == nullptr || !parent->is_dir()) {
+ return PathNotFound(path);
+ }
+ Directory& parent_dir = parent->as_dir();
+ auto child = parent_dir.Find(parts.back());
+ if (child == nullptr) {
+ return PathNotFound(path);
+ }
+ if (!child->is_file()) {
+ return NotAFile(path);
+ }
+ bool deleted = parent_dir.DeleteEntry(parts.back());
+ DCHECK(deleted);
+ return Status::OK();
+}
+
+Result<FileInfo> MockFileSystem::GetFileInfo(const std::string& path) {
+ auto parts = SplitAbstractPath(path);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ FileInfo info;
+ Entry* entry = impl_->FindEntry(parts);
+ if (entry == nullptr) {
+ info.set_type(FileType::NotFound);
+ } else {
+ info = entry->GetInfo();
+ }
+ info.set_path(path);
+ return info;
+}
+
+Result<FileInfoVector> MockFileSystem::GetFileInfo(const FileSelector& selector) {
+ auto parts = SplitAbstractPath(selector.base_dir);
+ RETURN_NOT_OK(ValidateAbstractPathParts(parts));
+
+ auto guard = impl_->lock_guard();
+
+ FileInfoVector results;
+
+ Entry* base_dir = impl_->FindEntry(parts);
+ if (base_dir == nullptr) {
+ // Base directory does not exist
+ if (selector.allow_not_found) {
+ return results;
+ } else {
+ return PathNotFound(selector.base_dir);
+ }
+ }
+ if (!base_dir->is_dir()) {
+ return NotADir(selector.base_dir);
+ }
+
+ impl_->GatherInfos(selector, selector.base_dir, base_dir->as_dir(), 0, &results);
+ return results;
+}
+
+namespace {
+
+// Helper for binary operations (move, copy)
+struct BinaryOp {
+ std::vector<std::string> src_parts;
+ std::vector<std::string> dest_parts;
+ Directory& src_dir;
+ Directory& dest_dir;
+ std::string src_name;
+ std::string dest_name;
+ Entry* src_entry;
+ Entry* dest_entry;
+
+ template <typename OpFunc>
+ static Status Run(MockFileSystem::Impl* impl, const std::string& src,
+ const std::string& dest, OpFunc&& op_func) {
+ auto src_parts = SplitAbstractPath(src);
+ auto dest_parts = SplitAbstractPath(dest);
+ RETURN_NOT_OK(ValidateAbstractPathParts(src_parts));
+ RETURN_NOT_OK(ValidateAbstractPathParts(dest_parts));
+
+ auto guard = impl->lock_guard();
+
+ // Both source and destination must have valid parents
+ Entry* src_parent = impl->FindParent(src_parts);
+ if (src_parent == nullptr || !src_parent->is_dir()) {
+ return PathNotFound(src);
+ }
+ Entry* dest_parent = impl->FindParent(dest_parts);
+ if (dest_parent == nullptr || !dest_parent->is_dir()) {
+ return PathNotFound(dest);
+ }
+ Directory& src_dir = src_parent->as_dir();
+ Directory& dest_dir = dest_parent->as_dir();
+ DCHECK_GE(src_parts.size(), 1);
+ DCHECK_GE(dest_parts.size(), 1);
+ const auto& src_name = src_parts.back();
+ const auto& dest_name = dest_parts.back();
+
+ BinaryOp op{std::move(src_parts),
+ std::move(dest_parts),
+ src_dir,
+ dest_dir,
+ src_name,
+ dest_name,
+ src_dir.Find(src_name),
+ dest_dir.Find(dest_name)};
+
+ return op_func(std::move(op));
+ }
+};
+
+} // namespace
+
+Status MockFileSystem::Move(const std::string& src, const std::string& dest) {
+ return BinaryOp::Run(impl_.get(), src, dest, [&](const BinaryOp& op) -> Status {
+ if (op.src_entry == nullptr) {
+ return PathNotFound(src);
+ }
+ if (op.dest_entry != nullptr) {
+ if (op.dest_entry->is_dir()) {
+ return Status::IOError("Cannot replace destination '", dest,
+ "', which is a directory");
+ }
+ if (op.dest_entry->is_file() && op.src_entry->is_dir()) {
+ return Status::IOError("Cannot replace destination '", dest,
+ "', which is a file, with directory '", src, "'");
+ }
+ }
+ if (op.src_parts.size() < op.dest_parts.size()) {
+ // Check if dest is a child of src
+ auto p =
+ std::mismatch(op.src_parts.begin(), op.src_parts.end(), op.dest_parts.begin());
+ if (p.first == op.src_parts.end()) {
+ return Status::IOError("Cannot move '", src, "' into child path '", dest, "'");
+ }
+ }
+
+ // Move original entry, fix its name
+ std::unique_ptr<Entry> new_entry(new Entry(std::move(*op.src_entry)));
+ new_entry->SetName(op.dest_name);
+ bool deleted = op.src_dir.DeleteEntry(op.src_name);
+ DCHECK(deleted);
+ op.dest_dir.AssignEntry(op.dest_name, std::move(new_entry));
+ return Status::OK();
+ });
+}
+
+Status MockFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+ return BinaryOp::Run(impl_.get(), src, dest, [&](const BinaryOp& op) -> Status {
+ if (op.src_entry == nullptr) {
+ return PathNotFound(src);
+ }
+ if (!op.src_entry->is_file()) {
+ return NotAFile(src);
+ }
+ if (op.dest_entry != nullptr && op.dest_entry->is_dir()) {
+ return Status::IOError("Cannot replace destination '", dest,
+ "', which is a directory");
+ }
+
+ // Copy original entry, fix its name
+ std::unique_ptr<Entry> new_entry(new Entry(File(op.src_entry->as_file())));
+ new_entry->SetName(op.dest_name);
+ op.dest_dir.AssignEntry(op.dest_name, std::move(new_entry));
+ return Status::OK();
+ });
+}
+
+Result<std::shared_ptr<io::InputStream>> MockFileSystem::OpenInputStream(
+ const std::string& path) {
+ auto guard = impl_->lock_guard();
+
+ return impl_->OpenInputReader(path);
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> MockFileSystem::OpenInputFile(
+ const std::string& path) {
+ auto guard = impl_->lock_guard();
+
+ return impl_->OpenInputReader(path);
+}
+
+Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenOutputStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ auto guard = impl_->lock_guard();
+
+ return impl_->OpenOutputStream(path, /*append=*/false, metadata);
+}
+
+Result<std::shared_ptr<io::OutputStream>> MockFileSystem::OpenAppendStream(
+ const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+ auto guard = impl_->lock_guard();
+
+ return impl_->OpenOutputStream(path, /*append=*/true, metadata);
+}
+
+std::vector<MockDirInfo> MockFileSystem::AllDirs() {
+ auto guard = impl_->lock_guard();
+
+ std::vector<MockDirInfo> result;
+ impl_->DumpDirs("", impl_->RootDir(), &result);
+ return result;
+}
+
+std::vector<MockFileInfo> MockFileSystem::AllFiles() {
+ auto guard = impl_->lock_guard();
+
+ std::vector<MockFileInfo> result;
+ impl_->DumpFiles("", impl_->RootDir(), &result);
+ return result;
+}
+
+Status MockFileSystem::CreateFile(const std::string& path, util::string_view contents,
+ bool recursive) {
+ auto parent = fs::internal::GetAbstractPathParent(path).first;
+
+ if (parent != "") {
+ RETURN_NOT_OK(CreateDir(parent, recursive));
+ }
+
+ ARROW_ASSIGN_OR_RAISE(auto file, OpenOutputStream(path));
+ RETURN_NOT_OK(file->Write(contents));
+ return file->Close();
+}
+
+Result<std::shared_ptr<FileSystem>> MockFileSystem::Make(
+ TimePoint current_time, const std::vector<FileInfo>& infos) {
+ auto fs = std::make_shared<MockFileSystem>(current_time);
+ for (const auto& info : infos) {
+ switch (info.type()) {
+ case FileType::Directory:
+ RETURN_NOT_OK(fs->CreateDir(info.path(), /*recursive*/ true));
+ break;
+ case FileType::File:
+ RETURN_NOT_OK(fs->CreateFile(info.path(), "", /*recursive*/ true));
+ break;
+ default:
+ break;
+ }
+ }
+
+ return fs;
+}
+
+FileInfoGenerator MockAsyncFileSystem::GetFileInfoGenerator(const FileSelector& select) {
+ auto maybe_infos = GetFileInfo(select);
+ if (maybe_infos.ok()) {
+ // Return the FileInfo entries one by one
+ const auto& infos = *maybe_infos;
+ std::vector<FileInfoVector> chunks(infos.size());
+ std::transform(infos.begin(), infos.end(), chunks.begin(),
+ [](const FileInfo& info) { return FileInfoVector{info}; });
+ return MakeVectorGenerator(std::move(chunks));
+ } else {
+ return MakeFailingGenerator(maybe_infos);
+ }
+}
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.h
new file mode 100644
index 0000000000..378f30d295
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/mockfs.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+struct MockDirInfo {
+ std::string full_path;
+ TimePoint mtime;
+
+ bool operator==(const MockDirInfo& other) const {
+ return mtime == other.mtime && full_path == other.full_path;
+ }
+
+ friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockDirInfo&);
+};
+
+struct MockFileInfo {
+ std::string full_path;
+ TimePoint mtime;
+ util::string_view data;
+
+ bool operator==(const MockFileInfo& other) const {
+ return mtime == other.mtime && full_path == other.full_path && data == other.data;
+ }
+
+ friend ARROW_EXPORT std::ostream& operator<<(std::ostream&, const MockFileInfo&);
+};
+
+/// A mock FileSystem implementation that holds its contents in memory.
+///
+/// Useful for validating the FileSystem API, writing conformance suite,
+/// and bootstrapping FileSystem-based APIs.
+class ARROW_EXPORT MockFileSystem : public FileSystem {
+ public:
+ explicit MockFileSystem(TimePoint current_time,
+ const io::IOContext& = io::default_io_context());
+ ~MockFileSystem() override;
+
+ std::string type_name() const override { return "mock"; }
+
+ bool Equals(const FileSystem& other) const override;
+
+ // XXX It's not very practical to have to explicitly declare inheritance
+ // of default overrides.
+ using FileSystem::GetFileInfo;
+ Result<FileInfo> GetFileInfo(const std::string& path) override;
+ Result<std::vector<FileInfo>> GetFileInfo(const FileSelector& select) override;
+
+ Status CreateDir(const std::string& path, bool recursive = true) override;
+
+ Status DeleteDir(const std::string& path) override;
+ Status DeleteDirContents(const std::string& path) override;
+ Status DeleteRootDirContents() override;
+
+ Status DeleteFile(const std::string& path) override;
+
+ Status Move(const std::string& src, const std::string& dest) override;
+
+ Status CopyFile(const std::string& src, const std::string& dest) override;
+
+ Result<std::shared_ptr<io::InputStream>> OpenInputStream(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::RandomAccessFile>> OpenInputFile(
+ const std::string& path) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+ Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
+ const std::string& path,
+ const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
+
+ // Contents-dumping helpers to ease testing.
+ // Output is lexicographically-ordered by full path.
+ std::vector<MockDirInfo> AllDirs();
+ std::vector<MockFileInfo> AllFiles();
+
+ // Create a File with a content from a string.
+ Status CreateFile(const std::string& path, util::string_view content,
+ bool recursive = true);
+
+ // Create a MockFileSystem out of (empty) FileInfo. The content of every
+ // file is empty and of size 0. All directories will be created recursively.
+ static Result<std::shared_ptr<FileSystem>> Make(TimePoint current_time,
+ const std::vector<FileInfo>& infos);
+
+ class Impl;
+
+ protected:
+ std::unique_ptr<Impl> impl_;
+};
+
+class ARROW_EXPORT MockAsyncFileSystem : public MockFileSystem {
+ public:
+ explicit MockAsyncFileSystem(TimePoint current_time,
+ const io::IOContext& io_context = io::default_io_context())
+ : MockFileSystem(current_time, io_context) {
+ default_async_is_sync_ = false;
+ }
+
+ FileInfoGenerator GetFileInfoGenerator(const FileSelector& select) override;
+};
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc
new file mode 100644
index 0000000000..f1bd5c087b
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.cc
@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+
+#include "arrow/filesystem/path_util.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+// XXX How does this encode Windows UNC paths?
+
+std::vector<std::string> SplitAbstractPath(const std::string& path) {
+ std::vector<std::string> parts;
+ auto v = util::string_view(path);
+ // Strip trailing slash
+ if (v.length() > 0 && v.back() == kSep) {
+ v = v.substr(0, v.length() - 1);
+ }
+ // Strip leading slash
+ if (v.length() > 0 && v.front() == kSep) {
+ v = v.substr(1);
+ }
+ if (v.length() == 0) {
+ return parts;
+ }
+
+ auto append_part = [&parts, &v](size_t start, size_t end) {
+ parts.push_back(std::string(v.substr(start, end - start)));
+ };
+
+ size_t start = 0;
+ while (true) {
+ size_t end = v.find_first_of(kSep, start);
+ append_part(start, end);
+ if (end == std::string::npos) {
+ break;
+ }
+ start = end + 1;
+ }
+ return parts;
+}
+
+std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s) {
+ // XXX should strip trailing slash?
+
+ auto pos = s.find_last_of(kSep);
+ if (pos == std::string::npos) {
+ // Empty parent
+ return {{}, s};
+ }
+ return {s.substr(0, pos), s.substr(pos + 1)};
+}
+
+std::string GetAbstractPathExtension(const std::string& s) {
+ util::string_view basename(s);
+ auto offset = basename.find_last_of(kSep);
+ if (offset != std::string::npos) {
+ basename = basename.substr(offset);
+ }
+ auto dot = basename.find_last_of('.');
+ if (dot == util::string_view::npos) {
+ // Empty extension
+ return "";
+ }
+ return std::string(basename.substr(dot + 1));
+}
+
+Status ValidateAbstractPathParts(const std::vector<std::string>& parts) {
+ for (const auto& part : parts) {
+ if (part.length() == 0) {
+ return Status::Invalid("Empty path component");
+ }
+ if (part.find_first_of(kSep) != std::string::npos) {
+ return Status::Invalid("Separator in component '", part, "'");
+ }
+ }
+ return Status::OK();
+}
+
+std::string ConcatAbstractPath(const std::string& base, const std::string& stem) {
+ DCHECK(!stem.empty());
+ if (base.empty()) {
+ return stem;
+ }
+ return EnsureTrailingSlash(base) + std::string(RemoveLeadingSlash(stem));
+}
+
+std::string EnsureTrailingSlash(util::string_view v) {
+ if (v.length() > 0 && v.back() != kSep) {
+ // XXX How about "C:" on Windows? We probably don't want to turn it into "C:/"...
+ // Unless the local filesystem always uses absolute paths
+ return std::string(v) + kSep;
+ } else {
+ return std::string(v);
+ }
+}
+
+std::string EnsureLeadingSlash(util::string_view v) {
+ if (v.length() == 0 || v.front() != kSep) {
+ // XXX How about "C:" on Windows? We probably don't want to turn it into "/C:"...
+ return kSep + std::string(v);
+ } else {
+ return std::string(v);
+ }
+}
+util::string_view RemoveTrailingSlash(util::string_view key) {
+ while (!key.empty() && key.back() == kSep) {
+ key.remove_suffix(1);
+ }
+ return key;
+}
+
+util::string_view RemoveLeadingSlash(util::string_view key) {
+ while (!key.empty() && key.front() == kSep) {
+ key.remove_prefix(1);
+ }
+ return key;
+}
+
+Result<std::string> MakeAbstractPathRelative(const std::string& base,
+ const std::string& path) {
+ if (base.empty() || base.front() != kSep) {
+ return Status::Invalid("MakeAbstractPathRelative called with non-absolute base '",
+ base, "'");
+ }
+ auto b = EnsureLeadingSlash(RemoveTrailingSlash(base));
+ auto p = util::string_view(path);
+ if (p.substr(0, b.size()) != util::string_view(b)) {
+ return Status::Invalid("Path '", path, "' is not relative to '", base, "'");
+ }
+ p = p.substr(b.size());
+ if (!p.empty() && p.front() != kSep && b.back() != kSep) {
+ return Status::Invalid("Path '", path, "' is not relative to '", base, "'");
+ }
+ return std::string(RemoveLeadingSlash(p));
+}
+
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant) {
+ ancestor = RemoveTrailingSlash(ancestor);
+ if (ancestor == "") {
+ // everything is a descendant of the root directory
+ return true;
+ }
+
+ descendant = RemoveTrailingSlash(descendant);
+ if (!descendant.starts_with(ancestor)) {
+ // an ancestor path is a prefix of descendant paths
+ return false;
+ }
+
+ descendant.remove_prefix(ancestor.size());
+
+ if (descendant.empty()) {
+ // "/hello" is an ancestor of "/hello"
+ return true;
+ }
+
+ // "/hello/w" is not an ancestor of "/hello/world"
+ return descendant.starts_with(std::string{kSep});
+}
+
+util::optional<util::string_view> RemoveAncestor(util::string_view ancestor,
+ util::string_view descendant) {
+ if (!IsAncestorOf(ancestor, descendant)) {
+ return util::nullopt;
+ }
+
+ auto relative_to_ancestor = descendant.substr(ancestor.size());
+ return RemoveLeadingSlash(relative_to_ancestor);
+}
+
+std::vector<std::string> AncestorsFromBasePath(util::string_view base_path,
+ util::string_view descendant) {
+ std::vector<std::string> ancestry;
+ if (auto relative = RemoveAncestor(base_path, descendant)) {
+ auto relative_segments = fs::internal::SplitAbstractPath(std::string(*relative));
+
+ // the last segment indicates descendant
+ relative_segments.pop_back();
+
+ if (relative_segments.empty()) {
+ // no missing parent
+ return {};
+ }
+
+ for (auto&& relative_segment : relative_segments) {
+ ancestry.push_back(JoinAbstractPath(
+ std::vector<std::string>{std::string(base_path), std::move(relative_segment)}));
+ base_path = ancestry.back();
+ }
+ }
+ return ancestry;
+}
+
+std::vector<std::string> MinimalCreateDirSet(std::vector<std::string> dirs) {
+ std::sort(dirs.begin(), dirs.end());
+
+ for (auto ancestor = dirs.begin(); ancestor != dirs.end(); ++ancestor) {
+ auto descendant = ancestor;
+ auto descendants_end = descendant + 1;
+
+ while (descendants_end != dirs.end() && IsAncestorOf(*descendant, *descendants_end)) {
+ ++descendant;
+ ++descendants_end;
+ }
+
+ ancestor = dirs.erase(ancestor, descendants_end - 1);
+ }
+
+ // the root directory need not be created
+ if (dirs.size() == 1 && IsAncestorOf(dirs[0], "")) {
+ return {};
+ }
+
+ return dirs;
+}
+
+std::string ToBackslashes(util::string_view v) {
+ std::string s(v);
+ for (auto& c : s) {
+ if (c == '/') {
+ c = '\\';
+ }
+ }
+ return s;
+}
+
+std::string ToSlashes(util::string_view v) {
+ std::string s(v);
+#ifdef _WIN32
+ for (auto& c : s) {
+ if (c == '\\') {
+ c = '/';
+ }
+ }
+#endif
+ return s;
+}
+
+bool IsEmptyPath(util::string_view v) {
+ for (const auto c : v) {
+ if (c != '/') {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.h
new file mode 100644
index 0000000000..5701c11b5d
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/path_util.h
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+constexpr char kSep = '/';
+
+// Computations on abstract paths (not local paths with system-dependent behaviour).
+// Abstract paths are typically used in URIs.
+
+// Split an abstract path into its individual components.
+ARROW_EXPORT
+std::vector<std::string> SplitAbstractPath(const std::string& s);
+
+// Return the extension of the file
+ARROW_EXPORT
+std::string GetAbstractPathExtension(const std::string& s);
+
+// Return the parent directory and basename of an abstract path. Both values may be
+// empty.
+ARROW_EXPORT
+std::pair<std::string, std::string> GetAbstractPathParent(const std::string& s);
+
+// Validate the components of an abstract path.
+ARROW_EXPORT
+Status ValidateAbstractPathParts(const std::vector<std::string>& parts);
+
+// Append a non-empty stem to an abstract path.
+ARROW_EXPORT
+std::string ConcatAbstractPath(const std::string& base, const std::string& stem);
+
+// Make path relative to base, if it starts with base. Otherwise error out.
+ARROW_EXPORT
+Result<std::string> MakeAbstractPathRelative(const std::string& base,
+ const std::string& path);
+
+ARROW_EXPORT
+std::string EnsureLeadingSlash(util::string_view s);
+
+ARROW_EXPORT
+util::string_view RemoveLeadingSlash(util::string_view s);
+
+ARROW_EXPORT
+std::string EnsureTrailingSlash(util::string_view s);
+
+ARROW_EXPORT
+util::string_view RemoveTrailingSlash(util::string_view s);
+
+ARROW_EXPORT
+bool IsAncestorOf(util::string_view ancestor, util::string_view descendant);
+
+ARROW_EXPORT
+util::optional<util::string_view> RemoveAncestor(util::string_view ancestor,
+ util::string_view descendant);
+
+/// Return a vector of ancestors between a base path and a descendant.
+/// For example,
+///
+/// AncestorsFromBasePath("a/b", "a/b/c/d/e") -> ["a/b/c", "a/b/c/d"]
+ARROW_EXPORT
+std::vector<std::string> AncestorsFromBasePath(util::string_view base_path,
+ util::string_view descendant);
+
+/// Given a vector of paths of directories which must be created, produce a the minimal
+/// subset for passing to CreateDir(recursive=true) by removing redundant parent
+/// directories
+ARROW_EXPORT
+std::vector<std::string> MinimalCreateDirSet(std::vector<std::string> dirs);
+
+// Join the components of an abstract path.
+template <class StringIt>
+std::string JoinAbstractPath(StringIt it, StringIt end) {
+ std::string path;
+ for (; it != end; ++it) {
+ if (it->empty()) continue;
+
+ if (!path.empty()) {
+ path += kSep;
+ }
+ path += *it;
+ }
+ return path;
+}
+
+template <class StringRange>
+std::string JoinAbstractPath(const StringRange& range) {
+ return JoinAbstractPath(range.begin(), range.end());
+}
+
+/// Convert slashes to backslashes, on all platforms. Mostly useful for testing.
+ARROW_EXPORT
+std::string ToBackslashes(util::string_view s);
+
+/// Ensure a local path is abstract, by converting backslashes to regular slashes
+/// on Windows. Return the path unchanged on other systems.
+ARROW_EXPORT
+std::string ToSlashes(util::string_view s);
+
+ARROW_EXPORT
+bool IsEmptyPath(util::string_view s);
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/type_fwd.h
new file mode 100644
index 0000000000..112563577d
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/type_fwd.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+namespace fs {
+
+/// \brief FileSystem entry type
+enum class FileType : int8_t {
+ /// Entry is not found
+ NotFound,
+ /// Entry exists but its type is unknown
+ ///
+ /// This can designate a special file such as a Unix socket or character
+ /// device, or Windows NUL / CON / ...
+ Unknown,
+ /// Entry is a regular file
+ File,
+ /// Entry is a directory
+ Directory
+};
+
+struct FileInfo;
+
+struct FileSelector;
+
+class FileSystem;
+class SubTreeFileSystem;
+class SlowFileSystem;
+class LocalFileSystem;
+class S3FileSystem;
+
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc
new file mode 100644
index 0000000000..8f86707375
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.cc
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/filesystem/util_internal.h"
+#include "arrow/buffer.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+TimePoint CurrentTimePoint() {
+ auto now = std::chrono::system_clock::now();
+ return TimePoint(
+ std::chrono::duration_cast<TimePoint::duration>(now.time_since_epoch()));
+}
+
+Status CopyStream(const std::shared_ptr<io::InputStream>& src,
+ const std::shared_ptr<io::OutputStream>& dest, int64_t chunk_size,
+ const io::IOContext& io_context) {
+ ARROW_ASSIGN_OR_RAISE(auto chunk, AllocateBuffer(chunk_size, io_context.pool()));
+
+ while (true) {
+ ARROW_ASSIGN_OR_RAISE(int64_t bytes_read,
+ src->Read(chunk_size, chunk->mutable_data()));
+ if (bytes_read == 0) {
+ // EOF
+ break;
+ }
+ RETURN_NOT_OK(dest->Write(chunk->data(), bytes_read));
+ }
+
+ return Status::OK();
+}
+
+Status PathNotFound(const std::string& path) {
+ return Status::IOError("Path does not exist '", path, "'");
+}
+
+Status NotADir(const std::string& path) {
+ return Status::IOError("Not a directory: '", path, "'");
+}
+
+Status NotAFile(const std::string& path) {
+ return Status::IOError("Not a regular file: '", path, "'");
+}
+
+Status InvalidDeleteDirContents(const std::string& path) {
+ return Status::Invalid(
+ "DeleteDirContents called on invalid path '", path, "'. ",
+ "If you wish to delete the root directory's contents, call DeleteRootDirContents.");
+}
+
+FileSystemGlobalOptions global_options;
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.h b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.h
new file mode 100644
index 0000000000..915c8d03d4
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/filesystem/util_internal.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/filesystem/filesystem.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/status.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace fs {
+namespace internal {
+
+ARROW_EXPORT
+TimePoint CurrentTimePoint();
+
+ARROW_EXPORT
+Status CopyStream(const std::shared_ptr<io::InputStream>& src,
+ const std::shared_ptr<io::OutputStream>& dest, int64_t chunk_size,
+ const io::IOContext& io_context);
+
+ARROW_EXPORT
+Status PathNotFound(const std::string& path);
+
+ARROW_EXPORT
+Status NotADir(const std::string& path);
+
+ARROW_EXPORT
+Status NotAFile(const std::string& path);
+
+ARROW_EXPORT
+Status InvalidDeleteDirContents(const std::string& path);
+
+extern FileSystemGlobalOptions global_options;
+
+} // namespace internal
+} // namespace fs
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc
new file mode 100644
index 0000000000..117b82df30
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/ipc/json_simple.cc
@@ -0,0 +1,940 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <sstream>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/array_dict.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/array/builder_dict.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_time.h"
+#include "arrow/array/builder_union.h"
+#include "arrow/ipc/json_simple.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/value_parsing.h"
+
+#include "arrow/json/rapidjson_defs.h"
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <rapidjson/rapidjson.h>
+#include <rapidjson/reader.h>
+#include <rapidjson/writer.h>
+
+namespace rj = arrow::rapidjson;
+
+namespace arrow {
+
+using internal::ParseValue;
+
+namespace ipc {
+namespace internal {
+namespace json {
+
+using ::arrow::internal::checked_cast;
+using ::arrow::internal::checked_pointer_cast;
+
+namespace {
+
+constexpr auto kParseFlags = rj::kParseFullPrecisionFlag | rj::kParseNanAndInfFlag;
+
+Status JSONTypeError(const char* expected_type, rj::Type json_type) {
+ return Status::Invalid("Expected ", expected_type, " or null, got JSON type ",
+ json_type);
+}
+
+class Converter {
+ public:
+ virtual ~Converter() = default;
+
+ virtual Status Init() { return Status::OK(); }
+
+ virtual Status AppendValue(const rj::Value& json_obj) = 0;
+
+ Status AppendNull() { return this->builder()->AppendNull(); }
+
+ virtual Status AppendValues(const rj::Value& json_array) = 0;
+
+ virtual std::shared_ptr<ArrayBuilder> builder() = 0;
+
+ virtual Status Finish(std::shared_ptr<Array>* out) {
+ auto builder = this->builder();
+ if (builder->length() == 0) {
+ // Make sure the builder was initialized
+ RETURN_NOT_OK(builder->Resize(1));
+ }
+ return builder->Finish(out);
+ }
+
+ protected:
+ std::shared_ptr<DataType> type_;
+};
+
+Status GetConverter(const std::shared_ptr<DataType>&, std::shared_ptr<Converter>* out);
+
+// CRTP
+template <class Derived>
+class ConcreteConverter : public Converter {
+ public:
+ Status AppendValues(const rj::Value& json_array) override {
+ auto self = static_cast<Derived*>(this);
+ if (!json_array.IsArray()) {
+ return JSONTypeError("array", json_array.GetType());
+ }
+ auto size = json_array.Size();
+ for (uint32_t i = 0; i < size; ++i) {
+ RETURN_NOT_OK(self->AppendValue(json_array[i]));
+ }
+ return Status::OK();
+ }
+
+ const std::shared_ptr<DataType>& value_type() {
+ if (type_->id() != Type::DICTIONARY) {
+ return type_;
+ }
+ return checked_cast<const DictionaryType&>(*type_).value_type();
+ }
+
+ template <typename BuilderType>
+ Status MakeConcreteBuilder(std::shared_ptr<BuilderType>* out) {
+ std::unique_ptr<ArrayBuilder> builder;
+ RETURN_NOT_OK(MakeBuilder(default_memory_pool(), this->type_, &builder));
+ *out = checked_pointer_cast<BuilderType>(std::move(builder));
+ DCHECK(*out);
+ return Status::OK();
+ }
+};
+
+// ------------------------------------------------------------------------
+// Converter for null arrays
+
+class NullConverter final : public ConcreteConverter<NullConverter> {
+ public:
+ explicit NullConverter(const std::shared_ptr<DataType>& type) {
+ type_ = type;
+ builder_ = std::make_shared<NullBuilder>();
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return AppendNull();
+ }
+ return JSONTypeError("null", json_obj.GetType());
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<NullBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for boolean arrays
+
+class BooleanConverter final : public ConcreteConverter<BooleanConverter> {
+ public:
+ explicit BooleanConverter(const std::shared_ptr<DataType>& type) {
+ type_ = type;
+ builder_ = std::make_shared<BooleanBuilder>();
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return AppendNull();
+ }
+ if (json_obj.IsBool()) {
+ return builder_->Append(json_obj.GetBool());
+ }
+ if (json_obj.IsInt()) {
+ return builder_->Append(json_obj.GetInt() != 0);
+ }
+ return JSONTypeError("boolean", json_obj.GetType());
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BooleanBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Helpers for numeric converters
+
+// Convert single signed integer value (also {Date,Time}{32,64} and Timestamp)
+template <typename T>
+enable_if_physical_signed_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
+ const DataType& type,
+ typename T::c_type* out) {
+ if (json_obj.IsInt64()) {
+ int64_t v64 = json_obj.GetInt64();
+ *out = static_cast<typename T::c_type>(v64);
+ if (*out == v64) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("Value ", v64, " out of bounds for ", type);
+ }
+ } else {
+ *out = static_cast<typename T::c_type>(0);
+ return JSONTypeError("signed int", json_obj.GetType());
+ }
+}
+
+// Convert single unsigned integer value
+template <typename T>
+enable_if_physical_unsigned_integer<T, Status> ConvertNumber(const rj::Value& json_obj,
+ const DataType& type,
+ typename T::c_type* out) {
+ if (json_obj.IsUint64()) {
+ uint64_t v64 = json_obj.GetUint64();
+ *out = static_cast<typename T::c_type>(v64);
+ if (*out == v64) {
+ return Status::OK();
+ } else {
+ return Status::Invalid("Value ", v64, " out of bounds for ", type);
+ }
+ } else {
+ *out = static_cast<typename T::c_type>(0);
+ return JSONTypeError("unsigned int", json_obj.GetType());
+ }
+}
+
+// Convert single floating point value
+template <typename T>
+enable_if_physical_floating_point<T, Status> ConvertNumber(const rj::Value& json_obj,
+ const DataType& type,
+ typename T::c_type* out) {
+ if (json_obj.IsNumber()) {
+ *out = static_cast<typename T::c_type>(json_obj.GetDouble());
+ return Status::OK();
+ } else {
+ *out = static_cast<typename T::c_type>(0);
+ return JSONTypeError("number", json_obj.GetType());
+ }
+}
+
+// ------------------------------------------------------------------------
+// Converter for int arrays
+
+template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
+class IntegerConverter final
+ : public ConcreteConverter<IntegerConverter<Type, BuilderType>> {
+ using c_type = typename Type::c_type;
+
+ static constexpr auto is_signed = std::is_signed<c_type>::value;
+
+ public:
+ explicit IntegerConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ c_type value;
+ RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
+ return builder_->Append(value);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for float arrays
+
+template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
+class FloatConverter final : public ConcreteConverter<FloatConverter<Type, BuilderType>> {
+ using c_type = typename Type::c_type;
+
+ public:
+ explicit FloatConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ c_type value;
+ RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
+ return builder_->Append(value);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for decimal arrays
+
+template <typename DecimalSubtype, typename DecimalValue, typename BuilderType>
+class DecimalConverter final
+ : public ConcreteConverter<
+ DecimalConverter<DecimalSubtype, DecimalValue, BuilderType>> {
+ public:
+ explicit DecimalConverter(const std::shared_ptr<DataType>& type) {
+ this->type_ = type;
+ decimal_type_ = &checked_cast<const DecimalSubtype&>(*this->value_type());
+ }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ if (json_obj.IsString()) {
+ int32_t precision, scale;
+ DecimalValue d;
+ auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+ RETURN_NOT_OK(DecimalValue::FromString(view, &d, &precision, &scale));
+ if (scale != decimal_type_->scale()) {
+ return Status::Invalid("Invalid scale for decimal: expected ",
+ decimal_type_->scale(), ", got ", scale);
+ }
+ return builder_->Append(d);
+ }
+ return JSONTypeError("decimal string", json_obj.GetType());
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+ const DecimalSubtype* decimal_type_;
+};
+
+template <typename BuilderType = typename TypeTraits<Decimal128Type>::BuilderType>
+using Decimal128Converter = DecimalConverter<Decimal128Type, Decimal128, BuilderType>;
+template <typename BuilderType = typename TypeTraits<Decimal256Type>::BuilderType>
+using Decimal256Converter = DecimalConverter<Decimal256Type, Decimal256, BuilderType>;
+
+// ------------------------------------------------------------------------
+// Converter for timestamp arrays
+
+class TimestampConverter final : public ConcreteConverter<TimestampConverter> {
+ public:
+ explicit TimestampConverter(const std::shared_ptr<DataType>& type)
+ : timestamp_type_{checked_cast<const TimestampType*>(type.get())} {
+ this->type_ = type;
+ builder_ = std::make_shared<TimestampBuilder>(type, default_memory_pool());
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ int64_t value;
+ if (json_obj.IsNumber()) {
+ RETURN_NOT_OK(ConvertNumber<Int64Type>(json_obj, *this->type_, &value));
+ } else if (json_obj.IsString()) {
+ util::string_view view(json_obj.GetString(), json_obj.GetStringLength());
+ if (!ParseValue(*timestamp_type_, view.data(), view.size(), &value)) {
+ return Status::Invalid("couldn't parse timestamp from ", view);
+ }
+ } else {
+ return JSONTypeError("timestamp", json_obj.GetType());
+ }
+ return builder_->Append(value);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ const TimestampType* timestamp_type_;
+ std::shared_ptr<TimestampBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for day-time interval arrays
+
+class DayTimeIntervalConverter final
+ : public ConcreteConverter<DayTimeIntervalConverter> {
+ public:
+ explicit DayTimeIntervalConverter(const std::shared_ptr<DataType>& type) {
+ this->type_ = type;
+ builder_ = std::make_shared<DayTimeIntervalBuilder>(default_memory_pool());
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ DayTimeIntervalType::DayMilliseconds value;
+ if (!json_obj.IsArray()) {
+ return JSONTypeError("array", json_obj.GetType());
+ }
+ if (json_obj.Size() != 2) {
+ return Status::Invalid(
+ "day time interval pair must have exactly two elements, had ", json_obj.Size());
+ }
+ RETURN_NOT_OK(ConvertNumber<Int32Type>(json_obj[0], *this->type_, &value.days));
+ RETURN_NOT_OK(
+ ConvertNumber<Int32Type>(json_obj[1], *this->type_, &value.milliseconds));
+ return builder_->Append(value);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<DayTimeIntervalBuilder> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for binary and string arrays
+
+template <typename Type, typename BuilderType = typename TypeTraits<Type>::BuilderType>
+class StringConverter final
+ : public ConcreteConverter<StringConverter<Type, BuilderType>> {
+ public:
+ explicit StringConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ if (json_obj.IsString()) {
+ auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+ return builder_->Append(view);
+ } else {
+ return JSONTypeError("string", json_obj.GetType());
+ }
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for fixed-size binary arrays
+
+template <typename BuilderType = typename TypeTraits<FixedSizeBinaryType>::BuilderType>
+class FixedSizeBinaryConverter final
+ : public ConcreteConverter<FixedSizeBinaryConverter<BuilderType>> {
+ public:
+ explicit FixedSizeBinaryConverter(const std::shared_ptr<DataType>& type) {
+ this->type_ = type;
+ }
+
+ Status Init() override { return this->MakeConcreteBuilder(&builder_); }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ if (json_obj.IsString()) {
+ auto view = util::string_view(json_obj.GetString(), json_obj.GetStringLength());
+ if (view.length() != static_cast<size_t>(builder_->byte_width())) {
+ std::stringstream ss;
+ ss << "Invalid string length " << view.length() << " in JSON input for "
+ << this->type_->ToString();
+ return Status::Invalid(ss.str());
+ }
+ return builder_->Append(view);
+ } else {
+ return JSONTypeError("string", json_obj.GetType());
+ }
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for list arrays
+
+template <typename TYPE>
+class ListConverter final : public ConcreteConverter<ListConverter<TYPE>> {
+ public:
+ using BuilderType = typename TypeTraits<TYPE>::BuilderType;
+
+ explicit ListConverter(const std::shared_ptr<DataType>& type) { this->type_ = type; }
+
+ Status Init() override {
+ const auto& list_type = checked_cast<const TYPE&>(*this->type_);
+ RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
+ auto child_builder = child_converter_->builder();
+ builder_ =
+ std::make_shared<BuilderType>(default_memory_pool(), child_builder, this->type_);
+ return Status::OK();
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ RETURN_NOT_OK(builder_->Append());
+ // Extend the child converter with this JSON array
+ return child_converter_->AppendValues(json_obj);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<BuilderType> builder_;
+ std::shared_ptr<Converter> child_converter_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for map arrays
+
+class MapConverter final : public ConcreteConverter<MapConverter> {
+ public:
+ explicit MapConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+ Status Init() override {
+ const auto& map_type = checked_cast<const MapType&>(*type_);
+ RETURN_NOT_OK(GetConverter(map_type.key_type(), &key_converter_));
+ RETURN_NOT_OK(GetConverter(map_type.item_type(), &item_converter_));
+ auto key_builder = key_converter_->builder();
+ auto item_builder = item_converter_->builder();
+ builder_ = std::make_shared<MapBuilder>(default_memory_pool(), key_builder,
+ item_builder, type_);
+ return Status::OK();
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ RETURN_NOT_OK(builder_->Append());
+ if (!json_obj.IsArray()) {
+ return JSONTypeError("array", json_obj.GetType());
+ }
+ auto size = json_obj.Size();
+ for (uint32_t i = 0; i < size; ++i) {
+ const auto& json_pair = json_obj[i];
+ if (!json_pair.IsArray()) {
+ return JSONTypeError("array", json_pair.GetType());
+ }
+ if (json_pair.Size() != 2) {
+ return Status::Invalid("key item pair must have exactly two elements, had ",
+ json_pair.Size());
+ }
+ if (json_pair[0].IsNull()) {
+ return Status::Invalid("null key is invalid");
+ }
+ RETURN_NOT_OK(key_converter_->AppendValue(json_pair[0]));
+ RETURN_NOT_OK(item_converter_->AppendValue(json_pair[1]));
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<MapBuilder> builder_;
+ std::shared_ptr<Converter> key_converter_, item_converter_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for fixed size list arrays
+
+class FixedSizeListConverter final : public ConcreteConverter<FixedSizeListConverter> {
+ public:
+ explicit FixedSizeListConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+ Status Init() override {
+ const auto& list_type = checked_cast<const FixedSizeListType&>(*type_);
+ list_size_ = list_type.list_size();
+ RETURN_NOT_OK(GetConverter(list_type.value_type(), &child_converter_));
+ auto child_builder = child_converter_->builder();
+ builder_ = std::make_shared<FixedSizeListBuilder>(default_memory_pool(),
+ child_builder, type_);
+ return Status::OK();
+ }
+
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ RETURN_NOT_OK(builder_->Append());
+ // Extend the child converter with this JSON array
+ RETURN_NOT_OK(child_converter_->AppendValues(json_obj));
+ if (json_obj.GetArray().Size() != static_cast<rj::SizeType>(list_size_)) {
+ return Status::Invalid("incorrect list size ", json_obj.GetArray().Size());
+ }
+ return Status::OK();
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ int32_t list_size_;
+ std::shared_ptr<FixedSizeListBuilder> builder_;
+ std::shared_ptr<Converter> child_converter_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for struct arrays
+
+class StructConverter final : public ConcreteConverter<StructConverter> {
+ public:
+ explicit StructConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+ Status Init() override {
+ std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+ for (const auto& field : type_->fields()) {
+ std::shared_ptr<Converter> child_converter;
+ RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
+ child_converters_.push_back(child_converter);
+ child_builders.push_back(child_converter->builder());
+ }
+ builder_ = std::make_shared<StructBuilder>(type_, default_memory_pool(),
+ std::move(child_builders));
+ return Status::OK();
+ }
+
+ // Append a JSON value that is either an array of N elements in order
+ // or an object mapping struct names to values (omitted struct members
+ // are mapped to null).
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ if (json_obj.IsArray()) {
+ auto size = json_obj.Size();
+ auto expected_size = static_cast<uint32_t>(type_->num_fields());
+ if (size != expected_size) {
+ return Status::Invalid("Expected array of size ", expected_size,
+ ", got array of size ", size);
+ }
+ for (uint32_t i = 0; i < size; ++i) {
+ RETURN_NOT_OK(child_converters_[i]->AppendValue(json_obj[i]));
+ }
+ return builder_->Append();
+ }
+ if (json_obj.IsObject()) {
+ auto remaining = json_obj.MemberCount();
+ auto num_children = type_->num_fields();
+ for (int32_t i = 0; i < num_children; ++i) {
+ const auto& field = type_->field(i);
+ auto it = json_obj.FindMember(field->name());
+ if (it != json_obj.MemberEnd()) {
+ --remaining;
+ RETURN_NOT_OK(child_converters_[i]->AppendValue(it->value));
+ } else {
+ RETURN_NOT_OK(child_converters_[i]->AppendNull());
+ }
+ }
+ if (remaining > 0) {
+ rj::StringBuffer sb;
+ rj::Writer<rj::StringBuffer> writer(sb);
+ json_obj.Accept(writer);
+ return Status::Invalid("Unexpected members in JSON object for type ",
+ type_->ToString(), " Object: ", sb.GetString());
+ }
+ return builder_->Append();
+ }
+ return JSONTypeError("array or object", json_obj.GetType());
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ std::shared_ptr<StructBuilder> builder_;
+ std::vector<std::shared_ptr<Converter>> child_converters_;
+};
+
+// ------------------------------------------------------------------------
+// Converter for union arrays
+
+class UnionConverter final : public ConcreteConverter<UnionConverter> {
+ public:
+ explicit UnionConverter(const std::shared_ptr<DataType>& type) { type_ = type; }
+
+ Status Init() override {
+ auto union_type = checked_cast<const UnionType*>(type_.get());
+ mode_ = union_type->mode();
+ type_id_to_child_num_.clear();
+ type_id_to_child_num_.resize(union_type->max_type_code() + 1, -1);
+ int child_i = 0;
+ for (auto type_id : union_type->type_codes()) {
+ type_id_to_child_num_[type_id] = child_i++;
+ }
+ std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+ for (const auto& field : type_->fields()) {
+ std::shared_ptr<Converter> child_converter;
+ RETURN_NOT_OK(GetConverter(field->type(), &child_converter));
+ child_converters_.push_back(child_converter);
+ child_builders.push_back(child_converter->builder());
+ }
+ if (mode_ == UnionMode::DENSE) {
+ builder_ = std::make_shared<DenseUnionBuilder>(default_memory_pool(),
+ std::move(child_builders), type_);
+ } else {
+ builder_ = std::make_shared<SparseUnionBuilder>(default_memory_pool(),
+ std::move(child_builders), type_);
+ }
+ return Status::OK();
+ }
+
+ // Append a JSON value that must be a 2-long array, containing the type_id
+ // and value of the UnionArray's slot.
+ Status AppendValue(const rj::Value& json_obj) override {
+ if (json_obj.IsNull()) {
+ return this->AppendNull();
+ }
+ if (!json_obj.IsArray()) {
+ return JSONTypeError("array", json_obj.GetType());
+ }
+ if (json_obj.Size() != 2) {
+ return Status::Invalid("Expected [type_id, value] pair, got array of size ",
+ json_obj.Size());
+ }
+ const auto& id_obj = json_obj[0];
+ if (!id_obj.IsInt()) {
+ return JSONTypeError("int", id_obj.GetType());
+ }
+
+ auto id = static_cast<int8_t>(id_obj.GetInt());
+ auto child_num = type_id_to_child_num_[id];
+ if (child_num == -1) {
+ return Status::Invalid("type_id ", id, " not found in ", *type_);
+ }
+
+ auto child_converter = child_converters_[child_num];
+ if (mode_ == UnionMode::SPARSE) {
+ RETURN_NOT_OK(checked_cast<SparseUnionBuilder&>(*builder_).Append(id));
+ for (auto&& other_converter : child_converters_) {
+ if (other_converter != child_converter) {
+ RETURN_NOT_OK(other_converter->AppendNull());
+ }
+ }
+ } else {
+ RETURN_NOT_OK(checked_cast<DenseUnionBuilder&>(*builder_).Append(id));
+ }
+ return child_converter->AppendValue(json_obj[1]);
+ }
+
+ std::shared_ptr<ArrayBuilder> builder() override { return builder_; }
+
+ private:
+ UnionMode::type mode_;
+ std::shared_ptr<ArrayBuilder> builder_;
+ std::vector<std::shared_ptr<Converter>> child_converters_;
+ std::vector<int8_t> type_id_to_child_num_;
+};
+
+// ------------------------------------------------------------------------
+// General conversion functions
+
+Status ConversionNotImplemented(const std::shared_ptr<DataType>& type) {
+ return Status::NotImplemented("JSON conversion to ", type->ToString(),
+ " not implemented");
+}
+
+Status GetDictConverter(const std::shared_ptr<DataType>& type,
+ std::shared_ptr<Converter>* out) {
+ std::shared_ptr<Converter> res;
+
+ const auto value_type = checked_cast<const DictionaryType&>(*type).value_type();
+
+#define SIMPLE_CONVERTER_CASE(ID, CLASS, TYPE) \
+ case ID: \
+ res = std::make_shared<CLASS<DictionaryBuilder<TYPE>>>(type); \
+ break;
+
+#define PARAM_CONVERTER_CASE(ID, CLASS, TYPE) \
+ case ID: \
+ res = std::make_shared<CLASS<TYPE, DictionaryBuilder<TYPE>>>(type); \
+ break;
+
+ switch (value_type->id()) {
+ PARAM_CONVERTER_CASE(Type::INT8, IntegerConverter, Int8Type)
+ PARAM_CONVERTER_CASE(Type::INT16, IntegerConverter, Int16Type)
+ PARAM_CONVERTER_CASE(Type::INT32, IntegerConverter, Int32Type)
+ PARAM_CONVERTER_CASE(Type::INT64, IntegerConverter, Int64Type)
+ PARAM_CONVERTER_CASE(Type::UINT8, IntegerConverter, UInt8Type)
+ PARAM_CONVERTER_CASE(Type::UINT16, IntegerConverter, UInt16Type)
+ PARAM_CONVERTER_CASE(Type::UINT32, IntegerConverter, UInt32Type)
+ PARAM_CONVERTER_CASE(Type::UINT64, IntegerConverter, UInt64Type)
+ PARAM_CONVERTER_CASE(Type::FLOAT, FloatConverter, FloatType)
+ PARAM_CONVERTER_CASE(Type::DOUBLE, FloatConverter, DoubleType)
+ PARAM_CONVERTER_CASE(Type::STRING, StringConverter, StringType)
+ PARAM_CONVERTER_CASE(Type::BINARY, StringConverter, BinaryType)
+ PARAM_CONVERTER_CASE(Type::LARGE_STRING, StringConverter, LargeStringType)
+ PARAM_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter, LargeBinaryType)
+ SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter,
+ FixedSizeBinaryType)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter, Decimal128Type)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter, Decimal256Type)
+ default:
+ return ConversionNotImplemented(type);
+ }
+
+#undef SIMPLE_CONVERTER_CASE
+#undef PARAM_CONVERTER_CASE
+
+ RETURN_NOT_OK(res->Init());
+ *out = res;
+ return Status::OK();
+}
+
+Status GetConverter(const std::shared_ptr<DataType>& type,
+ std::shared_ptr<Converter>* out) {
+ if (type->id() == Type::DICTIONARY) {
+ return GetDictConverter(type, out);
+ }
+
+ std::shared_ptr<Converter> res;
+
+#define SIMPLE_CONVERTER_CASE(ID, CLASS) \
+ case ID: \
+ res = std::make_shared<CLASS>(type); \
+ break;
+
+ switch (type->id()) {
+ SIMPLE_CONVERTER_CASE(Type::INT8, IntegerConverter<Int8Type>)
+ SIMPLE_CONVERTER_CASE(Type::INT16, IntegerConverter<Int16Type>)
+ SIMPLE_CONVERTER_CASE(Type::INT32, IntegerConverter<Int32Type>)
+ SIMPLE_CONVERTER_CASE(Type::INT64, IntegerConverter<Int64Type>)
+ SIMPLE_CONVERTER_CASE(Type::UINT8, IntegerConverter<UInt8Type>)
+ SIMPLE_CONVERTER_CASE(Type::UINT16, IntegerConverter<UInt16Type>)
+ SIMPLE_CONVERTER_CASE(Type::UINT32, IntegerConverter<UInt32Type>)
+ SIMPLE_CONVERTER_CASE(Type::UINT64, IntegerConverter<UInt64Type>)
+ SIMPLE_CONVERTER_CASE(Type::TIMESTAMP, TimestampConverter)
+ SIMPLE_CONVERTER_CASE(Type::DATE32, IntegerConverter<Date32Type>)
+ SIMPLE_CONVERTER_CASE(Type::DATE64, IntegerConverter<Date64Type>)
+ SIMPLE_CONVERTER_CASE(Type::TIME32, IntegerConverter<Time32Type>)
+ SIMPLE_CONVERTER_CASE(Type::TIME64, IntegerConverter<Time64Type>)
+ SIMPLE_CONVERTER_CASE(Type::DURATION, IntegerConverter<DurationType>)
+ SIMPLE_CONVERTER_CASE(Type::NA, NullConverter)
+ SIMPLE_CONVERTER_CASE(Type::BOOL, BooleanConverter)
+ SIMPLE_CONVERTER_CASE(Type::HALF_FLOAT, IntegerConverter<HalfFloatType>)
+ SIMPLE_CONVERTER_CASE(Type::FLOAT, FloatConverter<FloatType>)
+ SIMPLE_CONVERTER_CASE(Type::DOUBLE, FloatConverter<DoubleType>)
+ SIMPLE_CONVERTER_CASE(Type::LIST, ListConverter<ListType>)
+ SIMPLE_CONVERTER_CASE(Type::LARGE_LIST, ListConverter<LargeListType>)
+ SIMPLE_CONVERTER_CASE(Type::MAP, MapConverter)
+ SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_LIST, FixedSizeListConverter)
+ SIMPLE_CONVERTER_CASE(Type::STRUCT, StructConverter)
+ SIMPLE_CONVERTER_CASE(Type::STRING, StringConverter<StringType>)
+ SIMPLE_CONVERTER_CASE(Type::BINARY, StringConverter<BinaryType>)
+ SIMPLE_CONVERTER_CASE(Type::LARGE_STRING, StringConverter<LargeStringType>)
+ SIMPLE_CONVERTER_CASE(Type::LARGE_BINARY, StringConverter<LargeBinaryType>)
+ SIMPLE_CONVERTER_CASE(Type::FIXED_SIZE_BINARY, FixedSizeBinaryConverter<>)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL128, Decimal128Converter<>)
+ SIMPLE_CONVERTER_CASE(Type::DECIMAL256, Decimal256Converter<>)
+ SIMPLE_CONVERTER_CASE(Type::SPARSE_UNION, UnionConverter)
+ SIMPLE_CONVERTER_CASE(Type::DENSE_UNION, UnionConverter)
+ SIMPLE_CONVERTER_CASE(Type::INTERVAL_MONTHS, IntegerConverter<MonthIntervalType>)
+ SIMPLE_CONVERTER_CASE(Type::INTERVAL_DAY_TIME, DayTimeIntervalConverter)
+ default:
+ return ConversionNotImplemented(type);
+ }
+
+#undef SIMPLE_CONVERTER_CASE
+
+ RETURN_NOT_OK(res->Init());
+ *out = res;
+ return Status::OK();
+}
+
+} // namespace
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type, util::string_view json_string,
+ std::shared_ptr<Array>* out) {
+ std::shared_ptr<Converter> converter;
+ RETURN_NOT_OK(GetConverter(type, &converter));
+
+ rj::Document json_doc;
+ json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
+ if (json_doc.HasParseError()) {
+ return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+ GetParseError_En(json_doc.GetParseError()));
+ }
+
+ // The JSON document should be an array, append it
+ RETURN_NOT_OK(converter->AppendValues(json_doc));
+ return converter->Finish(out);
+}
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type,
+ const std::string& json_string, std::shared_ptr<Array>* out) {
+ return ArrayFromJSON(type, util::string_view(json_string), out);
+}
+
+Status ArrayFromJSON(const std::shared_ptr<DataType>& type, const char* json_string,
+ std::shared_ptr<Array>* out) {
+ return ArrayFromJSON(type, util::string_view(json_string), out);
+}
+
+Status DictArrayFromJSON(const std::shared_ptr<DataType>& type,
+ util::string_view indices_json,
+ util::string_view dictionary_json, std::shared_ptr<Array>* out) {
+ if (type->id() != Type::DICTIONARY) {
+ return Status::TypeError("DictArrayFromJSON requires dictionary type, got ", *type);
+ }
+
+ const auto& dictionary_type = checked_cast<const DictionaryType&>(*type);
+
+ std::shared_ptr<Array> indices, dictionary;
+ RETURN_NOT_OK(ArrayFromJSON(dictionary_type.index_type(), indices_json, &indices));
+ RETURN_NOT_OK(
+ ArrayFromJSON(dictionary_type.value_type(), dictionary_json, &dictionary));
+
+ return DictionaryArray::FromArrays(type, std::move(indices), std::move(dictionary))
+ .Value(out);
+}
+
+Status ScalarFromJSON(const std::shared_ptr<DataType>& type,
+ util::string_view json_string, std::shared_ptr<Scalar>* out) {
+ std::shared_ptr<Converter> converter;
+ RETURN_NOT_OK(GetConverter(type, &converter));
+
+ rj::Document json_doc;
+ json_doc.Parse<kParseFlags>(json_string.data(), json_string.length());
+ if (json_doc.HasParseError()) {
+ return Status::Invalid("JSON parse error at offset ", json_doc.GetErrorOffset(), ": ",
+ GetParseError_En(json_doc.GetParseError()));
+ }
+
+ std::shared_ptr<Array> array;
+ RETURN_NOT_OK(converter->AppendValue(json_doc));
+ RETURN_NOT_OK(converter->Finish(&array));
+ DCHECK_EQ(array->length(), 1);
+ ARROW_ASSIGN_OR_RAISE(*out, array->GetScalar(0));
+ return Status::OK();
+}
+
+} // namespace json
+} // namespace internal
+} // namespace ipc
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc
new file mode 100644
index 0000000000..040009c764
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.cc
@@ -0,0 +1,469 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/chunked_builder.h"
+
+#include <mutex>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/json/converter.h"
+#include "arrow/table.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/task_group.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::TaskGroup;
+
+namespace json {
+
+class NonNestedChunkedArrayBuilder : public ChunkedArrayBuilder {
+ public:
+ NonNestedChunkedArrayBuilder(const std::shared_ptr<TaskGroup>& task_group,
+ std::shared_ptr<Converter> converter)
+ : ChunkedArrayBuilder(task_group), converter_(std::move(converter)) {}
+
+ Status Finish(std::shared_ptr<ChunkedArray>* out) override {
+ RETURN_NOT_OK(task_group_->Finish());
+ *out = std::make_shared<ChunkedArray>(std::move(chunks_), converter_->out_type());
+ chunks_.clear();
+ return Status::OK();
+ }
+
+ Status ReplaceTaskGroup(const std::shared_ptr<TaskGroup>& task_group) override {
+ RETURN_NOT_OK(task_group_->Finish());
+ task_group_ = task_group;
+ return Status::OK();
+ }
+
+ protected:
+ ArrayVector chunks_;
+ std::mutex mutex_;
+ std::shared_ptr<Converter> converter_;
+};
+
+class TypedChunkedArrayBuilder
+ : public NonNestedChunkedArrayBuilder,
+ public std::enable_shared_from_this<TypedChunkedArrayBuilder> {
+ public:
+ using NonNestedChunkedArrayBuilder::NonNestedChunkedArrayBuilder;
+
+ void Insert(int64_t block_index, const std::shared_ptr<Field>&,
+ const std::shared_ptr<Array>& unconverted) override {
+ std::unique_lock<std::mutex> lock(mutex_);
+ if (chunks_.size() <= static_cast<size_t>(block_index)) {
+ chunks_.resize(static_cast<size_t>(block_index) + 1, nullptr);
+ }
+ lock.unlock();
+
+ auto self = shared_from_this();
+
+ task_group_->Append([self, block_index, unconverted] {
+ std::shared_ptr<Array> converted;
+ RETURN_NOT_OK(self->converter_->Convert(unconverted, &converted));
+ std::unique_lock<std::mutex> lock(self->mutex_);
+ self->chunks_[block_index] = std::move(converted);
+ return Status::OK();
+ });
+ }
+};
+
+class InferringChunkedArrayBuilder
+ : public NonNestedChunkedArrayBuilder,
+ public std::enable_shared_from_this<InferringChunkedArrayBuilder> {
+ public:
+ InferringChunkedArrayBuilder(const std::shared_ptr<TaskGroup>& task_group,
+ const PromotionGraph* promotion_graph,
+ std::shared_ptr<Converter> converter)
+ : NonNestedChunkedArrayBuilder(task_group, std::move(converter)),
+ promotion_graph_(promotion_graph) {}
+
+ void Insert(int64_t block_index, const std::shared_ptr<Field>& unconverted_field,
+ const std::shared_ptr<Array>& unconverted) override {
+ std::unique_lock<std::mutex> lock(mutex_);
+ if (chunks_.size() <= static_cast<size_t>(block_index)) {
+ chunks_.resize(static_cast<size_t>(block_index) + 1, nullptr);
+ unconverted_.resize(chunks_.size(), nullptr);
+ unconverted_fields_.resize(chunks_.size(), nullptr);
+ }
+ unconverted_[block_index] = unconverted;
+ unconverted_fields_[block_index] = unconverted_field;
+ lock.unlock();
+ ScheduleConvertChunk(block_index);
+ }
+
+ void ScheduleConvertChunk(int64_t block_index) {
+ auto self = shared_from_this();
+ task_group_->Append([self, block_index] {
+ return self->TryConvertChunk(static_cast<size_t>(block_index));
+ });
+ }
+
+ Status TryConvertChunk(size_t block_index) {
+ std::unique_lock<std::mutex> lock(mutex_);
+ auto converter = converter_;
+ auto unconverted = unconverted_[block_index];
+ auto unconverted_field = unconverted_fields_[block_index];
+ std::shared_ptr<Array> converted;
+
+ lock.unlock();
+ Status st = converter->Convert(unconverted, &converted);
+ lock.lock();
+
+ if (converter != converter_) {
+ // another task promoted converter; reconvert
+ lock.unlock();
+ ScheduleConvertChunk(block_index);
+ return Status::OK();
+ }
+
+ if (st.ok()) {
+ // conversion succeeded
+ chunks_[block_index] = std::move(converted);
+ return Status::OK();
+ }
+
+ auto promoted_type =
+ promotion_graph_->Promote(converter_->out_type(), unconverted_field);
+ if (promoted_type == nullptr) {
+ // converter failed, no promotion available
+ return st;
+ }
+ RETURN_NOT_OK(MakeConverter(promoted_type, converter_->pool(), &converter_));
+
+ size_t nchunks = chunks_.size();
+ for (size_t i = 0; i < nchunks; ++i) {
+ if (i != block_index && chunks_[i]) {
+ // We're assuming the chunk was converted using the wrong type
+ // (which should be true unless the executor reorders tasks)
+ chunks_[i].reset();
+ lock.unlock();
+ ScheduleConvertChunk(i);
+ lock.lock();
+ }
+ }
+ lock.unlock();
+ ScheduleConvertChunk(block_index);
+ return Status::OK();
+ }
+
+ Status Finish(std::shared_ptr<ChunkedArray>* out) override {
+ RETURN_NOT_OK(NonNestedChunkedArrayBuilder::Finish(out));
+ unconverted_.clear();
+ return Status::OK();
+ }
+
+ private:
+ ArrayVector unconverted_;
+ std::vector<std::shared_ptr<Field>> unconverted_fields_;
+ const PromotionGraph* promotion_graph_;
+};
+
+class ChunkedListArrayBuilder : public ChunkedArrayBuilder {
+ public:
+ ChunkedListArrayBuilder(const std::shared_ptr<TaskGroup>& task_group, MemoryPool* pool,
+ std::shared_ptr<ChunkedArrayBuilder> value_builder,
+ const std::shared_ptr<Field>& value_field)
+ : ChunkedArrayBuilder(task_group),
+ pool_(pool),
+ value_builder_(std::move(value_builder)),
+ value_field_(value_field) {}
+
+ Status ReplaceTaskGroup(const std::shared_ptr<TaskGroup>& task_group) override {
+ RETURN_NOT_OK(task_group_->Finish());
+ RETURN_NOT_OK(value_builder_->ReplaceTaskGroup(task_group));
+ task_group_ = task_group;
+ return Status::OK();
+ }
+
+ void Insert(int64_t block_index, const std::shared_ptr<Field>&,
+ const std::shared_ptr<Array>& unconverted) override {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ if (unconverted->type_id() == Type::NA) {
+ auto st = InsertNull(block_index, unconverted->length());
+ if (!st.ok()) {
+ task_group_->Append([st] { return st; });
+ }
+ return;
+ }
+
+ DCHECK_EQ(unconverted->type_id(), Type::LIST);
+ const auto& list_array = checked_cast<const ListArray&>(*unconverted);
+
+ if (null_bitmap_chunks_.size() <= static_cast<size_t>(block_index)) {
+ null_bitmap_chunks_.resize(static_cast<size_t>(block_index) + 1, nullptr);
+ offset_chunks_.resize(null_bitmap_chunks_.size(), nullptr);
+ }
+ null_bitmap_chunks_[block_index] = unconverted->null_bitmap();
+ offset_chunks_[block_index] = list_array.value_offsets();
+
+ value_builder_->Insert(block_index, list_array.list_type()->value_field(),
+ list_array.values());
+ }
+
+ Status Finish(std::shared_ptr<ChunkedArray>* out) override {
+ RETURN_NOT_OK(task_group_->Finish());
+
+ std::shared_ptr<ChunkedArray> value_array;
+ RETURN_NOT_OK(value_builder_->Finish(&value_array));
+
+ auto type = list(value_field_->WithType(value_array->type())->WithMetadata(nullptr));
+ ArrayVector chunks(null_bitmap_chunks_.size());
+ for (size_t i = 0; i < null_bitmap_chunks_.size(); ++i) {
+ auto value_chunk = value_array->chunk(static_cast<int>(i));
+ auto length = offset_chunks_[i]->size() / sizeof(int32_t) - 1;
+ chunks[i] = std::make_shared<ListArray>(type, length, offset_chunks_[i],
+ value_chunk, null_bitmap_chunks_[i]);
+ }
+
+ *out = std::make_shared<ChunkedArray>(std::move(chunks), type);
+ return Status::OK();
+ }
+
+ private:
+ // call from Insert() only, with mutex_ locked
+ Status InsertNull(int64_t block_index, int64_t length) {
+ value_builder_->Insert(block_index, value_field_, std::make_shared<NullArray>(0));
+
+ ARROW_ASSIGN_OR_RAISE(null_bitmap_chunks_[block_index],
+ AllocateEmptyBitmap(length, pool_));
+
+ int64_t offsets_length = (length + 1) * sizeof(int32_t);
+ ARROW_ASSIGN_OR_RAISE(offset_chunks_[block_index],
+ AllocateBuffer(offsets_length, pool_));
+ std::memset(offset_chunks_[block_index]->mutable_data(), 0, offsets_length);
+
+ return Status::OK();
+ }
+
+ std::mutex mutex_;
+ MemoryPool* pool_;
+ std::shared_ptr<ChunkedArrayBuilder> value_builder_;
+ BufferVector offset_chunks_, null_bitmap_chunks_;
+ std::shared_ptr<Field> value_field_;
+};
+
+class ChunkedStructArrayBuilder : public ChunkedArrayBuilder {
+ public:
+ ChunkedStructArrayBuilder(
+ const std::shared_ptr<TaskGroup>& task_group, MemoryPool* pool,
+ const PromotionGraph* promotion_graph,
+ std::vector<std::pair<std::string, std::shared_ptr<ChunkedArrayBuilder>>>
+ name_builders)
+ : ChunkedArrayBuilder(task_group), pool_(pool), promotion_graph_(promotion_graph) {
+ for (auto&& name_builder : name_builders) {
+ auto index = static_cast<int>(name_to_index_.size());
+ name_to_index_.emplace(std::move(name_builder.first), index);
+ child_builders_.emplace_back(std::move(name_builder.second));
+ }
+ }
+
+ void Insert(int64_t block_index, const std::shared_ptr<Field>&,
+ const std::shared_ptr<Array>& unconverted) override {
+ std::unique_lock<std::mutex> lock(mutex_);
+
+ if (null_bitmap_chunks_.size() <= static_cast<size_t>(block_index)) {
+ null_bitmap_chunks_.resize(static_cast<size_t>(block_index) + 1, nullptr);
+ chunk_lengths_.resize(null_bitmap_chunks_.size(), -1);
+ child_absent_.resize(null_bitmap_chunks_.size(), std::vector<bool>(0));
+ }
+ null_bitmap_chunks_[block_index] = unconverted->null_bitmap();
+ chunk_lengths_[block_index] = unconverted->length();
+
+ if (unconverted->type_id() == Type::NA) {
+ auto maybe_buffer = AllocateBitmap(unconverted->length(), pool_);
+ if (maybe_buffer.ok()) {
+ null_bitmap_chunks_[block_index] = *std::move(maybe_buffer);
+ std::memset(null_bitmap_chunks_[block_index]->mutable_data(), 0,
+ null_bitmap_chunks_[block_index]->size());
+ } else {
+ Status st = maybe_buffer.status();
+ task_group_->Append([st] { return st; });
+ }
+
+ // absent fields will be inserted at Finish
+ return;
+ }
+
+ const auto& struct_array = checked_cast<const StructArray&>(*unconverted);
+ if (promotion_graph_ == nullptr) {
+ // If unexpected fields are ignored or result in an error then all parsers will emit
+ // columns exclusively in the ordering specified in ParseOptions::explicit_schema,
+ // so child_builders_ is immutable and no associative lookup is necessary.
+ for (int i = 0; i < unconverted->num_fields(); ++i) {
+ child_builders_[i]->Insert(block_index, unconverted->type()->field(i),
+ struct_array.field(i));
+ }
+ } else {
+ auto st = InsertChildren(block_index, struct_array);
+ if (!st.ok()) {
+ return task_group_->Append([st] { return st; });
+ }
+ }
+ }
+
+ Status Finish(std::shared_ptr<ChunkedArray>* out) override {
+ RETURN_NOT_OK(task_group_->Finish());
+
+ if (promotion_graph_ != nullptr) {
+ // insert absent child chunks
+ for (auto&& name_index : name_to_index_) {
+ auto child_builder = child_builders_[name_index.second].get();
+
+ RETURN_NOT_OK(child_builder->ReplaceTaskGroup(TaskGroup::MakeSerial()));
+
+ for (size_t i = 0; i < chunk_lengths_.size(); ++i) {
+ if (child_absent_[i].size() > static_cast<size_t>(name_index.second) &&
+ !child_absent_[i][name_index.second]) {
+ continue;
+ }
+ auto empty = std::make_shared<NullArray>(chunk_lengths_[i]);
+ child_builder->Insert(i, promotion_graph_->Null(name_index.first), empty);
+ }
+ }
+ }
+
+ std::vector<std::shared_ptr<Field>> fields(name_to_index_.size());
+ std::vector<std::shared_ptr<ChunkedArray>> child_arrays(name_to_index_.size());
+ for (auto&& name_index : name_to_index_) {
+ auto child_builder = child_builders_[name_index.second].get();
+
+ std::shared_ptr<ChunkedArray> child_array;
+ RETURN_NOT_OK(child_builder->Finish(&child_array));
+
+ child_arrays[name_index.second] = child_array;
+ fields[name_index.second] = field(name_index.first, child_array->type());
+ }
+
+ auto type = struct_(std::move(fields));
+ ArrayVector chunks(null_bitmap_chunks_.size());
+ for (size_t i = 0; i < null_bitmap_chunks_.size(); ++i) {
+ ArrayVector child_chunks;
+ for (const auto& child_array : child_arrays) {
+ child_chunks.push_back(child_array->chunk(static_cast<int>(i)));
+ }
+ chunks[i] = std::make_shared<StructArray>(type, chunk_lengths_[i], child_chunks,
+ null_bitmap_chunks_[i]);
+ }
+
+ *out = std::make_shared<ChunkedArray>(std::move(chunks), type);
+ return Status::OK();
+ }
+
+ Status ReplaceTaskGroup(const std::shared_ptr<TaskGroup>& task_group) override {
+ RETURN_NOT_OK(task_group_->Finish());
+ for (auto&& child_builder : child_builders_) {
+ RETURN_NOT_OK(child_builder->ReplaceTaskGroup(task_group));
+ }
+ task_group_ = task_group;
+ return Status::OK();
+ }
+
+ private:
+ // Insert children associatively by name; the unconverted block may have unexpected or
+ // differently ordered fields
+ // call from Insert() only, with mutex_ locked
+ Status InsertChildren(int64_t block_index, const StructArray& unconverted) {
+ const auto& fields = unconverted.type()->fields();
+
+ for (int i = 0; i < unconverted.num_fields(); ++i) {
+ auto it = name_to_index_.find(fields[i]->name());
+
+ if (it == name_to_index_.end()) {
+ // add a new field to this builder
+ auto type = promotion_graph_->Infer(fields[i]);
+ DCHECK_NE(type, nullptr)
+ << "invalid unconverted_field encountered in conversion: "
+ << fields[i]->name() << ":" << *fields[i]->type();
+
+ auto new_index = static_cast<int>(name_to_index_.size());
+ it = name_to_index_.emplace(fields[i]->name(), new_index).first;
+
+ std::shared_ptr<ChunkedArrayBuilder> child_builder;
+ RETURN_NOT_OK(MakeChunkedArrayBuilder(task_group_, pool_, promotion_graph_, type,
+ &child_builder));
+ child_builders_.emplace_back(std::move(child_builder));
+ }
+
+ auto unconverted_field = unconverted.type()->field(i);
+ child_builders_[it->second]->Insert(block_index, unconverted_field,
+ unconverted.field(i));
+
+ child_absent_[block_index].resize(child_builders_.size(), true);
+ child_absent_[block_index][it->second] = false;
+ }
+
+ return Status::OK();
+ }
+
+ std::mutex mutex_;
+ MemoryPool* pool_;
+ const PromotionGraph* promotion_graph_;
+ std::unordered_map<std::string, int> name_to_index_;
+ std::vector<std::shared_ptr<ChunkedArrayBuilder>> child_builders_;
+ std::vector<std::vector<bool>> child_absent_;
+ BufferVector null_bitmap_chunks_;
+ std::vector<int64_t> chunk_lengths_;
+};
+
+Status MakeChunkedArrayBuilder(const std::shared_ptr<TaskGroup>& task_group,
+ MemoryPool* pool, const PromotionGraph* promotion_graph,
+ const std::shared_ptr<DataType>& type,
+ std::shared_ptr<ChunkedArrayBuilder>* out) {
+ if (type->id() == Type::STRUCT) {
+ std::vector<std::pair<std::string, std::shared_ptr<ChunkedArrayBuilder>>>
+ child_builders;
+ for (const auto& f : type->fields()) {
+ std::shared_ptr<ChunkedArrayBuilder> child_builder;
+ RETURN_NOT_OK(MakeChunkedArrayBuilder(task_group, pool, promotion_graph, f->type(),
+ &child_builder));
+ child_builders.emplace_back(f->name(), std::move(child_builder));
+ }
+ *out = std::make_shared<ChunkedStructArrayBuilder>(task_group, pool, promotion_graph,
+ std::move(child_builders));
+ return Status::OK();
+ }
+ if (type->id() == Type::LIST) {
+ const auto& list_type = checked_cast<const ListType&>(*type);
+ std::shared_ptr<ChunkedArrayBuilder> value_builder;
+ RETURN_NOT_OK(MakeChunkedArrayBuilder(task_group, pool, promotion_graph,
+ list_type.value_type(), &value_builder));
+ *out = std::make_shared<ChunkedListArrayBuilder>(
+ task_group, pool, std::move(value_builder), list_type.value_field());
+ return Status::OK();
+ }
+ std::shared_ptr<Converter> converter;
+ RETURN_NOT_OK(MakeConverter(type, pool, &converter));
+ if (promotion_graph) {
+ *out = std::make_shared<InferringChunkedArrayBuilder>(task_group, promotion_graph,
+ std::move(converter));
+ } else {
+ *out = std::make_shared<TypedChunkedArrayBuilder>(task_group, std::move(converter));
+ }
+ return Status::OK();
+}
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.h
new file mode 100644
index 0000000000..93b327bf3a
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunked_builder.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace json {
+
+class PromotionGraph;
+
+class ARROW_EXPORT ChunkedArrayBuilder {
+ public:
+ virtual ~ChunkedArrayBuilder() = default;
+
+ /// Spawn a task that will try to convert and insert the given JSON block
+ virtual void Insert(int64_t block_index,
+ const std::shared_ptr<Field>& unconverted_field,
+ const std::shared_ptr<Array>& unconverted) = 0;
+
+ /// Return the final chunked array.
+ /// Every chunk must be inserted before this is called!
+ virtual Status Finish(std::shared_ptr<ChunkedArray>* out) = 0;
+
+ /// Finish current task group and substitute a new one
+ virtual Status ReplaceTaskGroup(
+ const std::shared_ptr<arrow::internal::TaskGroup>& task_group) = 0;
+
+ protected:
+ explicit ChunkedArrayBuilder(
+ const std::shared_ptr<arrow::internal::TaskGroup>& task_group)
+ : task_group_(task_group) {}
+
+ std::shared_ptr<arrow::internal::TaskGroup> task_group_;
+};
+
+/// create a chunked builder
+///
+/// if unexpected fields and promotion need to be handled, promotion_graph must be
+/// non-null
+ARROW_EXPORT Status MakeChunkedArrayBuilder(
+ const std::shared_ptr<arrow::internal::TaskGroup>& task_group, MemoryPool* pool,
+ const PromotionGraph* promotion_graph, const std::shared_ptr<DataType>& type,
+ std::shared_ptr<ChunkedArrayBuilder>* out);
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc
new file mode 100644
index 0000000000..b4b4d31eb9
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.cc
@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/chunker.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "arrow/json/rapidjson_defs.h"
+#include "rapidjson/reader.h"
+
+#include "arrow/buffer.h"
+#include "arrow/json/options.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/string_view.h"
+
+namespace arrow {
+
+using internal::make_unique;
+using util::string_view;
+
+namespace json {
+
+namespace rj = arrow::rapidjson;
+
+static size_t ConsumeWhitespace(string_view view) {
+#ifdef RAPIDJSON_SIMD
+ auto data = view.data();
+ auto nonws_begin = rj::SkipWhitespace_SIMD(data, data + view.size());
+ return nonws_begin - data;
+#else
+ auto ws_count = view.find_first_not_of(" \t\r\n");
+ if (ws_count == string_view::npos) {
+ return view.size();
+ } else {
+ return ws_count;
+ }
+#endif
+}
+
+/// RapidJson custom stream for reading JSON stored in multiple buffers
+/// http://rapidjson.org/md_doc_stream.html#CustomStream
+class MultiStringStream {
+ public:
+ using Ch = char;
+ explicit MultiStringStream(std::vector<string_view> strings)
+ : strings_(std::move(strings)) {
+ std::reverse(strings_.begin(), strings_.end());
+ }
+ explicit MultiStringStream(const BufferVector& buffers) : strings_(buffers.size()) {
+ for (size_t i = 0; i < buffers.size(); ++i) {
+ strings_[i] = string_view(*buffers[i]);
+ }
+ std::reverse(strings_.begin(), strings_.end());
+ }
+ char Peek() const {
+ if (strings_.size() == 0) return '\0';
+ return strings_.back()[0];
+ }
+ char Take() {
+ if (strings_.size() == 0) return '\0';
+ char taken = strings_.back()[0];
+ if (strings_.back().size() == 1) {
+ strings_.pop_back();
+ } else {
+ strings_.back() = strings_.back().substr(1);
+ }
+ ++index_;
+ return taken;
+ }
+ size_t Tell() { return index_; }
+ void Put(char) { ARROW_LOG(FATAL) << "not implemented"; }
+ void Flush() { ARROW_LOG(FATAL) << "not implemented"; }
+ char* PutBegin() {
+ ARROW_LOG(FATAL) << "not implemented";
+ return nullptr;
+ }
+ size_t PutEnd(char*) {
+ ARROW_LOG(FATAL) << "not implemented";
+ return 0;
+ }
+
+ private:
+ size_t index_ = 0;
+ std::vector<string_view> strings_;
+};
+
+template <typename Stream>
+static size_t ConsumeWholeObject(Stream&& stream) {
+ static constexpr unsigned parse_flags = rj::kParseIterativeFlag |
+ rj::kParseStopWhenDoneFlag |
+ rj::kParseNumbersAsStringsFlag;
+ rj::BaseReaderHandler<rj::UTF8<>> handler;
+ rj::Reader reader;
+ // parse a single JSON object
+ switch (reader.Parse<parse_flags>(stream, handler).Code()) {
+ case rj::kParseErrorNone:
+ return stream.Tell();
+ case rj::kParseErrorDocumentEmpty:
+ return 0;
+ default:
+ // rapidjson emitted an error, the most recent object was partial
+ return string_view::npos;
+ }
+}
+
+namespace {
+
+// A BoundaryFinder implementation that assumes JSON objects can contain raw newlines,
+// and uses actual JSON parsing to delimit them.
+class ParsingBoundaryFinder : public BoundaryFinder {
+ public:
+ Status FindFirst(string_view partial, string_view block, int64_t* out_pos) override {
+ // NOTE: We could bubble up JSON parse errors here, but the actual parsing
+ // step will detect them later anyway.
+ auto length = ConsumeWholeObject(MultiStringStream({partial, block}));
+ if (length == string_view::npos) {
+ *out_pos = -1;
+ } else {
+ DCHECK_GE(length, partial.size());
+ DCHECK_LE(length, partial.size() + block.size());
+ *out_pos = static_cast<int64_t>(length - partial.size());
+ }
+ return Status::OK();
+ }
+
+ Status FindLast(util::string_view block, int64_t* out_pos) override {
+ const size_t block_length = block.size();
+ size_t consumed_length = 0;
+ while (consumed_length < block_length) {
+ rj::MemoryStream ms(reinterpret_cast<const char*>(block.data()), block.size());
+ using InputStream = rj::EncodedInputStream<rj::UTF8<>, rj::MemoryStream>;
+ auto length = ConsumeWholeObject(InputStream(ms));
+ if (length == string_view::npos || length == 0) {
+ // found incomplete object or block is empty
+ break;
+ }
+ consumed_length += length;
+ block = block.substr(length);
+ }
+ if (consumed_length == 0) {
+ *out_pos = -1;
+ } else {
+ consumed_length += ConsumeWhitespace(block);
+ DCHECK_LE(consumed_length, block_length);
+ *out_pos = static_cast<int64_t>(consumed_length);
+ }
+ return Status::OK();
+ }
+
+ Status FindNth(util::string_view partial, util::string_view block, int64_t count,
+ int64_t* out_pos, int64_t* num_found) override {
+ return Status::NotImplemented("ParsingBoundaryFinder::FindNth");
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Chunker> MakeChunker(const ParseOptions& options) {
+ std::shared_ptr<BoundaryFinder> delimiter;
+ if (options.newlines_in_values) {
+ delimiter = std::make_shared<ParsingBoundaryFinder>();
+ } else {
+ delimiter = MakeNewlineBoundaryFinder();
+ }
+ return std::unique_ptr<Chunker>(new Chunker(std::move(delimiter)));
+}
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.h
new file mode 100644
index 0000000000..9ed85126da
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/chunker.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/delimiting.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace json {
+
+struct ParseOptions;
+
+ARROW_EXPORT
+std::unique_ptr<Chunker> MakeChunker(const ParseOptions& options);
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc
new file mode 100644
index 0000000000..fe9500d40c
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.cc
@@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/converter.h"
+
+#include <memory>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_time.h"
+#include "arrow/json/parser.h"
+#include "arrow/type.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/value_parsing.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using util::string_view;
+
+namespace json {
+
+template <typename... Args>
+Status GenericConversionError(const DataType& type, Args&&... args) {
+ return Status::Invalid("Failed of conversion of JSON to ", type,
+ std::forward<Args>(args)...);
+}
+
+namespace {
+
+const DictionaryArray& GetDictionaryArray(const std::shared_ptr<Array>& in) {
+ DCHECK_EQ(in->type_id(), Type::DICTIONARY);
+ auto dict_type = checked_cast<const DictionaryType*>(in->type().get());
+ DCHECK_EQ(dict_type->index_type()->id(), Type::INT32);
+ DCHECK_EQ(dict_type->value_type()->id(), Type::STRING);
+ return checked_cast<const DictionaryArray&>(*in);
+}
+
+template <typename ValidVisitor, typename NullVisitor>
+Status VisitDictionaryEntries(const DictionaryArray& dict_array,
+ ValidVisitor&& visit_valid, NullVisitor&& visit_null) {
+ const StringArray& dict = checked_cast<const StringArray&>(*dict_array.dictionary());
+ const Int32Array& indices = checked_cast<const Int32Array&>(*dict_array.indices());
+ for (int64_t i = 0; i < indices.length(); ++i) {
+ if (indices.IsValid(i)) {
+ RETURN_NOT_OK(visit_valid(dict.GetView(indices.GetView(i))));
+ } else {
+ RETURN_NOT_OK(visit_null());
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace
+
+// base class for types which accept and output non-nested types
+class PrimitiveConverter : public Converter {
+ public:
+ PrimitiveConverter(MemoryPool* pool, std::shared_ptr<DataType> out_type)
+ : Converter(pool, out_type) {}
+};
+
+class NullConverter : public PrimitiveConverter {
+ public:
+ using PrimitiveConverter::PrimitiveConverter;
+
+ Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+ if (in->type_id() != Type::NA) {
+ return GenericConversionError(*out_type_, " from ", *in->type());
+ }
+ *out = in;
+ return Status::OK();
+ }
+};
+
+class BooleanConverter : public PrimitiveConverter {
+ public:
+ using PrimitiveConverter::PrimitiveConverter;
+
+ Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+ if (in->type_id() == Type::NA) {
+ return MakeArrayOfNull(boolean(), in->length(), pool_).Value(out);
+ }
+ if (in->type_id() != Type::BOOL) {
+ return GenericConversionError(*out_type_, " from ", *in->type());
+ }
+ *out = in;
+ return Status::OK();
+ }
+};
+
+template <typename T>
+class NumericConverter : public PrimitiveConverter {
+ public:
+ using value_type = typename T::c_type;
+
+ NumericConverter(MemoryPool* pool, const std::shared_ptr<DataType>& type)
+ : PrimitiveConverter(pool, type), numeric_type_(checked_cast<const T&>(*type)) {}
+
+ Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+ if (in->type_id() == Type::NA) {
+ return MakeArrayOfNull(out_type_, in->length(), pool_).Value(out);
+ }
+ const auto& dict_array = GetDictionaryArray(in);
+
+ using Builder = typename TypeTraits<T>::BuilderType;
+ Builder builder(out_type_, pool_);
+ RETURN_NOT_OK(builder.Resize(dict_array.indices()->length()));
+
+ auto visit_valid = [&](string_view repr) {
+ value_type value;
+ if (!arrow::internal::ParseValue(numeric_type_, repr.data(), repr.size(), &value)) {
+ return GenericConversionError(*out_type_, ", couldn't parse:", repr);
+ }
+
+ builder.UnsafeAppend(value);
+ return Status::OK();
+ };
+
+ auto visit_null = [&]() {
+ builder.UnsafeAppendNull();
+ return Status::OK();
+ };
+
+ RETURN_NOT_OK(VisitDictionaryEntries(dict_array, visit_valid, visit_null));
+ return builder.Finish(out);
+ }
+
+ const T& numeric_type_;
+};
+
+template <typename DateTimeType>
+class DateTimeConverter : public PrimitiveConverter {
+ public:
+ DateTimeConverter(MemoryPool* pool, const std::shared_ptr<DataType>& type)
+ : PrimitiveConverter(pool, type), converter_(pool, repr_type()) {}
+
+ Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+ if (in->type_id() == Type::NA) {
+ return MakeArrayOfNull(out_type_, in->length(), pool_).Value(out);
+ }
+
+ std::shared_ptr<Array> repr;
+ RETURN_NOT_OK(converter_.Convert(in, &repr));
+
+ auto out_data = repr->data()->Copy();
+ out_data->type = out_type_;
+ *out = MakeArray(out_data);
+
+ return Status::OK();
+ }
+
+ private:
+ using ReprType = typename CTypeTraits<typename DateTimeType::c_type>::ArrowType;
+ static std::shared_ptr<DataType> repr_type() {
+ return TypeTraits<ReprType>::type_singleton();
+ }
+ NumericConverter<ReprType> converter_;
+};
+
+template <typename T>
+class BinaryConverter : public PrimitiveConverter {
+ public:
+ using PrimitiveConverter::PrimitiveConverter;
+
+ Status Convert(const std::shared_ptr<Array>& in, std::shared_ptr<Array>* out) override {
+ if (in->type_id() == Type::NA) {
+ return MakeArrayOfNull(out_type_, in->length(), pool_).Value(out);
+ }
+ const auto& dict_array = GetDictionaryArray(in);
+
+ using Builder = typename TypeTraits<T>::BuilderType;
+ Builder builder(out_type_, pool_);
+ RETURN_NOT_OK(builder.Resize(dict_array.indices()->length()));
+
+ // TODO(bkietz) this can be computed during parsing at low cost
+ int64_t data_length = 0;
+ auto visit_lengths_valid = [&](string_view value) {
+ data_length += value.size();
+ return Status::OK();
+ };
+
+ auto visit_lengths_null = [&]() {
+ // no-op
+ return Status::OK();
+ };
+
+ RETURN_NOT_OK(
+ VisitDictionaryEntries(dict_array, visit_lengths_valid, visit_lengths_null));
+ RETURN_NOT_OK(builder.ReserveData(data_length));
+
+ auto visit_valid = [&](string_view value) {
+ builder.UnsafeAppend(value);
+ return Status::OK();
+ };
+
+ auto visit_null = [&]() {
+ builder.UnsafeAppendNull();
+ return Status::OK();
+ };
+
+ RETURN_NOT_OK(VisitDictionaryEntries(dict_array, visit_valid, visit_null));
+ return builder.Finish(out);
+ }
+};
+
+Status MakeConverter(const std::shared_ptr<DataType>& out_type, MemoryPool* pool,
+ std::shared_ptr<Converter>* out) {
+ switch (out_type->id()) {
+#define CONVERTER_CASE(TYPE_ID, CONVERTER_TYPE) \
+ case TYPE_ID: \
+ *out = std::make_shared<CONVERTER_TYPE>(pool, out_type); \
+ break
+ CONVERTER_CASE(Type::NA, NullConverter);
+ CONVERTER_CASE(Type::BOOL, BooleanConverter);
+ CONVERTER_CASE(Type::INT8, NumericConverter<Int8Type>);
+ CONVERTER_CASE(Type::INT16, NumericConverter<Int16Type>);
+ CONVERTER_CASE(Type::INT32, NumericConverter<Int32Type>);
+ CONVERTER_CASE(Type::INT64, NumericConverter<Int64Type>);
+ CONVERTER_CASE(Type::UINT8, NumericConverter<UInt8Type>);
+ CONVERTER_CASE(Type::UINT16, NumericConverter<UInt16Type>);
+ CONVERTER_CASE(Type::UINT32, NumericConverter<UInt32Type>);
+ CONVERTER_CASE(Type::UINT64, NumericConverter<UInt64Type>);
+ CONVERTER_CASE(Type::FLOAT, NumericConverter<FloatType>);
+ CONVERTER_CASE(Type::DOUBLE, NumericConverter<DoubleType>);
+ CONVERTER_CASE(Type::TIMESTAMP, NumericConverter<TimestampType>);
+ CONVERTER_CASE(Type::TIME32, DateTimeConverter<Time32Type>);
+ CONVERTER_CASE(Type::TIME64, DateTimeConverter<Time64Type>);
+ CONVERTER_CASE(Type::DATE32, DateTimeConverter<Date32Type>);
+ CONVERTER_CASE(Type::DATE64, DateTimeConverter<Date64Type>);
+ CONVERTER_CASE(Type::BINARY, BinaryConverter<BinaryType>);
+ CONVERTER_CASE(Type::STRING, BinaryConverter<StringType>);
+ CONVERTER_CASE(Type::LARGE_BINARY, BinaryConverter<LargeBinaryType>);
+ CONVERTER_CASE(Type::LARGE_STRING, BinaryConverter<LargeStringType>);
+ default:
+ return Status::NotImplemented("JSON conversion to ", *out_type,
+ " is not supported");
+#undef CONVERTER_CASE
+ }
+ return Status::OK();
+}
+
+const PromotionGraph* GetPromotionGraph() {
+ static struct : PromotionGraph {
+ std::shared_ptr<Field> Null(const std::string& name) const override {
+ return field(name, null(), true, Kind::Tag(Kind::kNull));
+ }
+
+ std::shared_ptr<DataType> Infer(
+ const std::shared_ptr<Field>& unexpected_field) const override {
+ auto kind = Kind::FromTag(unexpected_field->metadata());
+ switch (kind) {
+ case Kind::kNull:
+ return null();
+
+ case Kind::kBoolean:
+ return boolean();
+
+ case Kind::kNumber:
+ return int64();
+
+ case Kind::kString:
+ return timestamp(TimeUnit::SECOND);
+
+ case Kind::kArray: {
+ const auto& type = checked_cast<const ListType&>(*unexpected_field->type());
+ auto value_field = type.value_field();
+ return list(value_field->WithType(Infer(value_field)));
+ }
+ case Kind::kObject: {
+ auto fields = unexpected_field->type()->fields();
+ for (auto& field : fields) {
+ field = field->WithType(Infer(field));
+ }
+ return struct_(std::move(fields));
+ }
+ default:
+ return nullptr;
+ }
+ }
+
+ std::shared_ptr<DataType> Promote(
+ const std::shared_ptr<DataType>& failed,
+ const std::shared_ptr<Field>& unexpected_field) const override {
+ switch (failed->id()) {
+ case Type::NA:
+ return Infer(unexpected_field);
+
+ case Type::TIMESTAMP:
+ return utf8();
+
+ case Type::INT64:
+ return float64();
+
+ default:
+ return nullptr;
+ }
+ }
+ } impl;
+
+ return &impl;
+}
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.h
new file mode 100644
index 0000000000..9a812dd3c3
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/converter.h
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class DataType;
+class Field;
+class MemoryPool;
+
+namespace json {
+
+/// \brief interface for conversion of Arrays
+///
+/// Converters are not required to be correct for arbitrary input- only
+/// for unconverted arrays emitted by a corresponding parser.
+class ARROW_EXPORT Converter {
+ public:
+ virtual ~Converter() = default;
+
+ /// convert an array
+ /// on failure, this converter may be promoted to another converter which
+ /// *can* convert the given input.
+ virtual Status Convert(const std::shared_ptr<Array>& in,
+ std::shared_ptr<Array>* out) = 0;
+
+ std::shared_ptr<DataType> out_type() const { return out_type_; }
+
+ MemoryPool* pool() { return pool_; }
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(Converter);
+
+ Converter(MemoryPool* pool, const std::shared_ptr<DataType>& out_type)
+ : pool_(pool), out_type_(out_type) {}
+
+ MemoryPool* pool_;
+ std::shared_ptr<DataType> out_type_;
+};
+
+/// \brief produce a single converter to the specified out_type
+ARROW_EXPORT Status MakeConverter(const std::shared_ptr<DataType>& out_type,
+ MemoryPool* pool, std::shared_ptr<Converter>* out);
+
+class ARROW_EXPORT PromotionGraph {
+ public:
+ virtual ~PromotionGraph() = default;
+
+ /// \brief produce a valid field which will be inferred as null
+ virtual std::shared_ptr<Field> Null(const std::string& name) const = 0;
+
+ /// \brief given an unexpected field encountered during parsing, return a type to which
+ /// it may be convertible (may return null if none is available)
+ virtual std::shared_ptr<DataType> Infer(
+ const std::shared_ptr<Field>& unexpected_field) const = 0;
+
+ /// \brief given a type to which conversion failed, return a promoted type to which
+ /// conversion may succeed (may return null if none is available)
+ virtual std::shared_ptr<DataType> Promote(
+ const std::shared_ptr<DataType>& failed,
+ const std::shared_ptr<Field>& unexpected_field) const = 0;
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(PromotionGraph);
+ PromotionGraph() = default;
+};
+
+ARROW_EXPORT const PromotionGraph* GetPromotionGraph();
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc
new file mode 100644
index 0000000000..c857cd537e
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.cc
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/object_parser.h"
+#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep
+
+#include <rapidjson/document.h>
+
+namespace arrow {
+namespace json {
+namespace internal {
+
+namespace rj = arrow::rapidjson;
+
+class ObjectParser::Impl {
+ public:
+ Status Parse(arrow::util::string_view json) {
+ document_.Parse(reinterpret_cast<const rj::Document::Ch*>(json.data()),
+ static_cast<size_t>(json.size()));
+
+ if (document_.HasParseError()) {
+ return Status::Invalid("Json parse error (offset ", document_.GetErrorOffset(),
+ "): ", document_.GetParseError());
+ }
+ if (!document_.IsObject()) {
+ return Status::TypeError("Not a json object");
+ }
+ return Status::OK();
+ }
+
+ Result<std::string> GetString(const char* key) const {
+ if (!document_.HasMember(key)) {
+ return Status::KeyError("Key '", key, "' does not exist");
+ }
+ if (!document_[key].IsString()) {
+ return Status::TypeError("Key '", key, "' is not a string");
+ }
+ return document_[key].GetString();
+ }
+
+ Result<bool> GetBool(const char* key) const {
+ if (!document_.HasMember(key)) {
+ return Status::KeyError("Key '", key, "' does not exist");
+ }
+ if (!document_[key].IsBool()) {
+ return Status::TypeError("Key '", key, "' is not a boolean");
+ }
+ return document_[key].GetBool();
+ }
+
+ private:
+ rj::Document document_;
+};
+
+ObjectParser::ObjectParser() : impl_(new ObjectParser::Impl()) {}
+
+ObjectParser::~ObjectParser() = default;
+
+Status ObjectParser::Parse(arrow::util::string_view json) { return impl_->Parse(json); }
+
+Result<std::string> ObjectParser::GetString(const char* key) const {
+ return impl_->GetString(key);
+}
+
+Result<bool> ObjectParser::GetBool(const char* key) const { return impl_->GetBool(key); }
+
+} // namespace internal
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.h
new file mode 100644
index 0000000000..ef93201651
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_parser.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/result.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace json {
+namespace internal {
+
+/// This class is a helper to parse a json object from a string.
+/// It uses rapidjson::Document in implementation.
+class ARROW_EXPORT ObjectParser {
+ public:
+ ObjectParser();
+ ~ObjectParser();
+
+ Status Parse(arrow::util::string_view json);
+
+ Result<std::string> GetString(const char* key) const;
+ Result<bool> GetBool(const char* key) const;
+
+ private:
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace internal
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc
new file mode 100644
index 0000000000..06d09f81e9
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.cc
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/object_writer.h"
+#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep
+
+#include <rapidjson/document.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+
+namespace rj = arrow::rapidjson;
+
+namespace arrow {
+namespace json {
+namespace internal {
+
+class ObjectWriter::Impl {
+ public:
+ Impl() : root_(rj::kObjectType) {}
+
+ void SetString(arrow::util::string_view key, arrow::util::string_view value) {
+ rj::Document::AllocatorType& allocator = document_.GetAllocator();
+
+ rj::Value str_key(key.data(), allocator);
+ rj::Value str_value(value.data(), allocator);
+
+ root_.AddMember(str_key, str_value, allocator);
+ }
+
+ void SetBool(arrow::util::string_view key, bool value) {
+ rj::Document::AllocatorType& allocator = document_.GetAllocator();
+
+ rj::Value str_key(key.data(), allocator);
+
+ root_.AddMember(str_key, value, allocator);
+ }
+
+ std::string Serialize() {
+ rj::StringBuffer buffer;
+ rj::Writer<rj::StringBuffer> writer(buffer);
+ root_.Accept(writer);
+
+ return buffer.GetString();
+ }
+
+ private:
+ rj::Document document_;
+ rj::Value root_;
+};
+
+ObjectWriter::ObjectWriter() : impl_(new ObjectWriter::Impl()) {}
+
+ObjectWriter::~ObjectWriter() = default;
+
+void ObjectWriter::SetString(arrow::util::string_view key,
+ arrow::util::string_view value) {
+ impl_->SetString(key, value);
+}
+
+void ObjectWriter::SetBool(arrow::util::string_view key, bool value) {
+ impl_->SetBool(key, value);
+}
+
+std::string ObjectWriter::Serialize() { return impl_->Serialize(); }
+
+} // namespace internal
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.h
new file mode 100644
index 0000000000..55ff0ce52b
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/object_writer.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/string_view.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace json {
+namespace internal {
+
+/// This class is a helper to serialize a json object to a string.
+/// It uses rapidjson in implementation.
+class ARROW_EXPORT ObjectWriter {
+ public:
+ ObjectWriter();
+ ~ObjectWriter();
+
+ void SetString(arrow::util::string_view key, arrow::util::string_view value);
+ void SetBool(arrow::util::string_view key, bool value);
+
+ std::string Serialize();
+
+ private:
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+};
+
+} // namespace internal
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc
new file mode 100644
index 0000000000..dc5e628b1f
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/options.cc
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/options.h"
+
+namespace arrow {
+namespace json {
+
+ParseOptions ParseOptions::Defaults() { return ParseOptions(); }
+
+ReadOptions ReadOptions::Defaults() { return ReadOptions(); }
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/options.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/options.h
new file mode 100644
index 0000000000..d7edab9ced
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/options.h
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/json/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class DataType;
+class Schema;
+
+namespace json {
+
+enum class UnexpectedFieldBehavior : char {
+ /// Unexpected JSON fields are ignored
+ Ignore,
+ /// Unexpected JSON fields error out
+ Error,
+ /// Unexpected JSON fields are type-inferred and included in the output
+ InferType
+};
+
+struct ARROW_EXPORT ParseOptions {
+ // Parsing options
+
+ /// Optional explicit schema (disables type inference on those fields)
+ std::shared_ptr<Schema> explicit_schema;
+
+ /// Whether objects may be printed across multiple lines (for example pretty-printed)
+ ///
+ /// If true, parsing may be slower.
+ bool newlines_in_values = false;
+
+ /// How JSON fields outside of explicit_schema (if given) are treated
+ UnexpectedFieldBehavior unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
+
+ /// Create parsing options with default values
+ static ParseOptions Defaults();
+};
+
+struct ARROW_EXPORT ReadOptions {
+ // Reader options
+
+ /// Whether to use the global CPU thread pool
+ bool use_threads = true;
+ /// Block size we request from the IO layer; also determines the size of
+ /// chunks when use_threads is true
+ int32_t block_size = 1 << 20; // 1 MB
+
+ /// Create read options with default values
+ static ReadOptions Defaults();
+};
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc
new file mode 100644
index 0000000000..05f155645a
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.cc
@@ -0,0 +1,1099 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/parser.h"
+
+#include <functional>
+#include <limits>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/json/rapidjson_defs.h"
+#include "rapidjson/error/en.h"
+#include "rapidjson/reader.h"
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/type.h"
+#include "arrow/util/bitset_stack.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/trie.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitsetStack;
+using internal::checked_cast;
+using internal::make_unique;
+using util::string_view;
+
+namespace json {
+
+namespace rj = arrow::rapidjson;
+
+template <typename... T>
+static Status ParseError(T&&... t) {
+ return Status::Invalid("JSON parse error: ", std::forward<T>(t)...);
+}
+
+const std::string& Kind::Name(Kind::type kind) {
+ static const std::string names[] = {"null", "boolean", "number",
+ "string", "array", "object"};
+
+ return names[kind];
+}
+
+const std::shared_ptr<const KeyValueMetadata>& Kind::Tag(Kind::type kind) {
+ static const std::shared_ptr<const KeyValueMetadata> tags[] = {
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kNull)}}),
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kBoolean)}}),
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kNumber)}}),
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kString)}}),
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kArray)}}),
+ key_value_metadata({{"json_kind", Kind::Name(Kind::kObject)}}),
+ };
+ return tags[kind];
+}
+
+static arrow::internal::Trie MakeFromTagTrie() {
+ arrow::internal::TrieBuilder builder;
+ for (auto kind : {Kind::kNull, Kind::kBoolean, Kind::kNumber, Kind::kString,
+ Kind::kArray, Kind::kObject}) {
+ DCHECK_OK(builder.Append(Kind::Name(kind)));
+ }
+ auto name_to_kind = builder.Finish();
+ DCHECK_OK(name_to_kind.Validate());
+ return name_to_kind;
+}
+
+Kind::type Kind::FromTag(const std::shared_ptr<const KeyValueMetadata>& tag) {
+ static arrow::internal::Trie name_to_kind = MakeFromTagTrie();
+ DCHECK_NE(tag->FindKey("json_kind"), -1);
+ util::string_view name = tag->value(tag->FindKey("json_kind"));
+ DCHECK_NE(name_to_kind.Find(name), -1);
+ return static_cast<Kind::type>(name_to_kind.Find(name));
+}
+
+Status Kind::ForType(const DataType& type, Kind::type* kind) {
+ struct {
+ Status Visit(const NullType&) { return SetKind(Kind::kNull); }
+ Status Visit(const BooleanType&) { return SetKind(Kind::kBoolean); }
+ Status Visit(const NumberType&) { return SetKind(Kind::kNumber); }
+ Status Visit(const TimeType&) { return SetKind(Kind::kNumber); }
+ Status Visit(const DateType&) { return SetKind(Kind::kNumber); }
+ Status Visit(const BinaryType&) { return SetKind(Kind::kString); }
+ Status Visit(const FixedSizeBinaryType&) { return SetKind(Kind::kString); }
+ Status Visit(const DictionaryType& dict_type) {
+ return Kind::ForType(*dict_type.value_type(), kind_);
+ }
+ Status Visit(const ListType&) { return SetKind(Kind::kArray); }
+ Status Visit(const StructType&) { return SetKind(Kind::kObject); }
+ Status Visit(const DataType& not_impl) {
+ return Status::NotImplemented("JSON parsing of ", not_impl);
+ }
+ Status SetKind(Kind::type kind) {
+ *kind_ = kind;
+ return Status::OK();
+ }
+ Kind::type* kind_;
+ } visitor = {kind};
+ return VisitTypeInline(type, &visitor);
+}
+
+/// \brief ArrayBuilder for parsed but unconverted arrays
+template <Kind::type>
+class RawArrayBuilder;
+
+/// \brief packed pointer to a RawArrayBuilder
+///
+/// RawArrayBuilders are stored in HandlerBase,
+/// which allows storage of their indices (uint32_t) instead of a full pointer.
+/// BuilderPtr is also tagged with the json kind and nullable properties
+/// so those can be accessed before dereferencing the builder.
+struct BuilderPtr {
+ BuilderPtr() : BuilderPtr(BuilderPtr::null) {}
+ BuilderPtr(Kind::type k, uint32_t i, bool n) : index(i), kind(k), nullable(n) {}
+
+ BuilderPtr(const BuilderPtr&) = default;
+ BuilderPtr& operator=(const BuilderPtr&) = default;
+ BuilderPtr(BuilderPtr&&) = default;
+ BuilderPtr& operator=(BuilderPtr&&) = default;
+
+ // index of builder in its arena
+ // OR the length of that builder if kind == Kind::kNull
+ // (we don't allocate an arena for nulls since they're trivial)
+ uint32_t index;
+ Kind::type kind;
+ bool nullable;
+
+ bool operator==(BuilderPtr other) const {
+ return kind == other.kind && index == other.index;
+ }
+
+ bool operator!=(BuilderPtr other) const { return !(other == *this); }
+
+ operator bool() const { return *this != null; }
+
+ bool operator!() const { return *this == null; }
+
+ // The static BuilderPtr for null type data
+ static const BuilderPtr null;
+};
+
+const BuilderPtr BuilderPtr::null(Kind::kNull, 0, true);
+
+template <>
+class RawArrayBuilder<Kind::kBoolean> {
+ public:
+ explicit RawArrayBuilder(MemoryPool* pool)
+ : data_builder_(pool), null_bitmap_builder_(pool) {}
+
+ Status Append(bool value) {
+ RETURN_NOT_OK(data_builder_.Append(value));
+ return null_bitmap_builder_.Append(true);
+ }
+
+ Status AppendNull() {
+ RETURN_NOT_OK(data_builder_.Append(false));
+ return null_bitmap_builder_.Append(false);
+ }
+
+ Status AppendNull(int64_t count) {
+ RETURN_NOT_OK(data_builder_.Append(count, false));
+ return null_bitmap_builder_.Append(count, false);
+ }
+
+ Status Finish(std::shared_ptr<Array>* out) {
+ auto size = length();
+ auto null_count = null_bitmap_builder_.false_count();
+ std::shared_ptr<Buffer> data, null_bitmap;
+ RETURN_NOT_OK(data_builder_.Finish(&data));
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ *out = MakeArray(ArrayData::Make(boolean(), size, {null_bitmap, data}, null_count));
+ return Status::OK();
+ }
+
+ int64_t length() { return null_bitmap_builder_.length(); }
+
+ private:
+ TypedBufferBuilder<bool> data_builder_;
+ TypedBufferBuilder<bool> null_bitmap_builder_;
+};
+
+/// \brief builder for strings or unconverted numbers
+///
+/// Both of these are represented in the builder as an index only;
+/// the actual characters are stored in a single StringArray (into which
+/// an index refers). This means building is faster since we don't do
+/// allocation for string/number characters but accessing is strided.
+///
+/// On completion the indices and the character storage are combined
+/// into a dictionary-encoded array, which is a convenient container
+/// for indices referring into another array.
+class ScalarBuilder {
+ public:
+ explicit ScalarBuilder(MemoryPool* pool)
+ : values_length_(0), data_builder_(pool), null_bitmap_builder_(pool) {}
+
+ Status Append(int32_t index, int32_t value_length) {
+ RETURN_NOT_OK(data_builder_.Append(index));
+ values_length_ += value_length;
+ return null_bitmap_builder_.Append(true);
+ }
+
+ Status AppendNull() {
+ RETURN_NOT_OK(data_builder_.Append(0));
+ return null_bitmap_builder_.Append(false);
+ }
+
+ Status AppendNull(int64_t count) {
+ RETURN_NOT_OK(data_builder_.Append(count, 0));
+ return null_bitmap_builder_.Append(count, false);
+ }
+
+ Status Finish(std::shared_ptr<Array>* out) {
+ auto size = length();
+ auto null_count = null_bitmap_builder_.false_count();
+ std::shared_ptr<Buffer> data, null_bitmap;
+ RETURN_NOT_OK(data_builder_.Finish(&data));
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ *out = MakeArray(ArrayData::Make(int32(), size, {null_bitmap, data}, null_count));
+ return Status::OK();
+ }
+
+ int64_t length() { return null_bitmap_builder_.length(); }
+
+ int32_t values_length() { return values_length_; }
+
+ private:
+ int32_t values_length_;
+ TypedBufferBuilder<int32_t> data_builder_;
+ TypedBufferBuilder<bool> null_bitmap_builder_;
+};
+
+template <>
+class RawArrayBuilder<Kind::kNumber> : public ScalarBuilder {
+ public:
+ using ScalarBuilder::ScalarBuilder;
+};
+
+template <>
+class RawArrayBuilder<Kind::kString> : public ScalarBuilder {
+ public:
+ using ScalarBuilder::ScalarBuilder;
+};
+
+template <>
+class RawArrayBuilder<Kind::kArray> {
+ public:
+ explicit RawArrayBuilder(MemoryPool* pool)
+ : offset_builder_(pool), null_bitmap_builder_(pool) {}
+
+ Status Append(int32_t child_length) {
+ RETURN_NOT_OK(offset_builder_.Append(offset_));
+ offset_ += child_length;
+ return null_bitmap_builder_.Append(true);
+ }
+
+ Status AppendNull() {
+ RETURN_NOT_OK(offset_builder_.Append(offset_));
+ return null_bitmap_builder_.Append(false);
+ }
+
+ Status AppendNull(int64_t count) {
+ RETURN_NOT_OK(offset_builder_.Append(count, offset_));
+ return null_bitmap_builder_.Append(count, false);
+ }
+
+ Status Finish(std::function<Status(BuilderPtr, std::shared_ptr<Array>*)> finish_child,
+ std::shared_ptr<Array>* out) {
+ RETURN_NOT_OK(offset_builder_.Append(offset_));
+ auto size = length();
+ auto null_count = null_bitmap_builder_.false_count();
+ std::shared_ptr<Buffer> offsets, null_bitmap;
+ RETURN_NOT_OK(offset_builder_.Finish(&offsets));
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ std::shared_ptr<Array> values;
+ RETURN_NOT_OK(finish_child(value_builder_, &values));
+ auto type = list(field("item", values->type(), value_builder_.nullable,
+ Kind::Tag(value_builder_.kind)));
+ *out = MakeArray(ArrayData::Make(type, size, {null_bitmap, offsets}, {values->data()},
+ null_count));
+ return Status::OK();
+ }
+
+ BuilderPtr value_builder() const { return value_builder_; }
+
+ void value_builder(BuilderPtr builder) { value_builder_ = builder; }
+
+ int64_t length() { return null_bitmap_builder_.length(); }
+
+ private:
+ BuilderPtr value_builder_ = BuilderPtr::null;
+ int32_t offset_ = 0;
+ TypedBufferBuilder<int32_t> offset_builder_;
+ TypedBufferBuilder<bool> null_bitmap_builder_;
+};
+
+template <>
+class RawArrayBuilder<Kind::kObject> {
+ public:
+ explicit RawArrayBuilder(MemoryPool* pool) : null_bitmap_builder_(pool) {}
+
+ Status Append() { return null_bitmap_builder_.Append(true); }
+
+ Status AppendNull() { return null_bitmap_builder_.Append(false); }
+
+ Status AppendNull(int64_t count) { return null_bitmap_builder_.Append(count, false); }
+
+ std::string FieldName(int i) const {
+ for (const auto& name_index : name_to_index_) {
+ if (name_index.second == i) {
+ return name_index.first;
+ }
+ }
+ return "";
+ }
+
+ int GetFieldIndex(const std::string& name) const {
+ auto it = name_to_index_.find(name);
+ if (it == name_to_index_.end()) {
+ return -1;
+ }
+ return it->second;
+ }
+
+ int AddField(std::string name, BuilderPtr builder) {
+ auto index = num_fields();
+ field_builders_.push_back(builder);
+ name_to_index_.emplace(std::move(name), index);
+ return index;
+ }
+
+ int num_fields() const { return static_cast<int>(field_builders_.size()); }
+
+ BuilderPtr field_builder(int index) const { return field_builders_[index]; }
+
+ void field_builder(int index, BuilderPtr builder) { field_builders_[index] = builder; }
+
+ Status Finish(std::function<Status(BuilderPtr, std::shared_ptr<Array>*)> finish_child,
+ std::shared_ptr<Array>* out) {
+ auto size = length();
+ auto null_count = null_bitmap_builder_.false_count();
+ std::shared_ptr<Buffer> null_bitmap;
+ RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+
+ std::vector<string_view> field_names(num_fields());
+ for (const auto& name_index : name_to_index_) {
+ field_names[name_index.second] = name_index.first;
+ }
+
+ std::vector<std::shared_ptr<Field>> fields(num_fields());
+ std::vector<std::shared_ptr<ArrayData>> child_data(num_fields());
+ for (int i = 0; i < num_fields(); ++i) {
+ std::shared_ptr<Array> field_values;
+ RETURN_NOT_OK(finish_child(field_builders_[i], &field_values));
+ child_data[i] = field_values->data();
+ fields[i] = field(std::string(field_names[i]), field_values->type(),
+ field_builders_[i].nullable, Kind::Tag(field_builders_[i].kind));
+ }
+
+ *out = MakeArray(ArrayData::Make(struct_(std::move(fields)), size, {null_bitmap},
+ std::move(child_data), null_count));
+ return Status::OK();
+ }
+
+ int64_t length() { return null_bitmap_builder_.length(); }
+
+ private:
+ std::vector<BuilderPtr> field_builders_;
+ std::unordered_map<std::string, int> name_to_index_;
+ TypedBufferBuilder<bool> null_bitmap_builder_;
+};
+
+class RawBuilderSet {
+ public:
+ explicit RawBuilderSet(MemoryPool* pool) : pool_(pool) {}
+
+ /// Retrieve a pointer to a builder from a BuilderPtr
+ template <Kind::type kind>
+ enable_if_t<kind != Kind::kNull, RawArrayBuilder<kind>*> Cast(BuilderPtr builder) {
+ DCHECK_EQ(builder.kind, kind);
+ return arena<kind>().data() + builder.index;
+ }
+
+ /// construct a builder of statically defined kind
+ template <Kind::type kind>
+ Status MakeBuilder(int64_t leading_nulls, BuilderPtr* builder) {
+ builder->index = static_cast<uint32_t>(arena<kind>().size());
+ builder->kind = kind;
+ builder->nullable = true;
+ arena<kind>().emplace_back(RawArrayBuilder<kind>(pool_));
+ return Cast<kind>(*builder)->AppendNull(leading_nulls);
+ }
+
+ /// construct a builder of whatever kind corresponds to a DataType
+ Status MakeBuilder(const DataType& t, int64_t leading_nulls, BuilderPtr* builder) {
+ Kind::type kind;
+ RETURN_NOT_OK(Kind::ForType(t, &kind));
+ switch (kind) {
+ case Kind::kNull:
+ *builder = BuilderPtr(Kind::kNull, static_cast<uint32_t>(leading_nulls), true);
+ return Status::OK();
+
+ case Kind::kBoolean:
+ return MakeBuilder<Kind::kBoolean>(leading_nulls, builder);
+
+ case Kind::kNumber:
+ return MakeBuilder<Kind::kNumber>(leading_nulls, builder);
+
+ case Kind::kString:
+ return MakeBuilder<Kind::kString>(leading_nulls, builder);
+
+ case Kind::kArray: {
+ RETURN_NOT_OK(MakeBuilder<Kind::kArray>(leading_nulls, builder));
+ const auto& list_type = checked_cast<const ListType&>(t);
+
+ BuilderPtr value_builder;
+ RETURN_NOT_OK(MakeBuilder(*list_type.value_type(), 0, &value_builder));
+ value_builder.nullable = list_type.value_field()->nullable();
+
+ Cast<Kind::kArray>(*builder)->value_builder(value_builder);
+ return Status::OK();
+ }
+ case Kind::kObject: {
+ RETURN_NOT_OK(MakeBuilder<Kind::kObject>(leading_nulls, builder));
+ const auto& struct_type = checked_cast<const StructType&>(t);
+
+ for (const auto& f : struct_type.fields()) {
+ BuilderPtr field_builder;
+ RETURN_NOT_OK(MakeBuilder(*f->type(), leading_nulls, &field_builder));
+ field_builder.nullable = f->nullable();
+
+ Cast<Kind::kObject>(*builder)->AddField(f->name(), field_builder);
+ }
+ return Status::OK();
+ }
+ default:
+ return Status::NotImplemented("invalid builder type");
+ }
+ }
+
+ /// Appending null is slightly tricky since null count is stored inline
+ /// for builders of Kind::kNull. Append nulls using this helper
+ Status AppendNull(BuilderPtr parent, int field_index, BuilderPtr builder) {
+ if (ARROW_PREDICT_FALSE(!builder.nullable)) {
+ return ParseError("a required field was null");
+ }
+ switch (builder.kind) {
+ case Kind::kNull: {
+ DCHECK_EQ(builder, parent.kind == Kind::kArray
+ ? Cast<Kind::kArray>(parent)->value_builder()
+ : Cast<Kind::kObject>(parent)->field_builder(field_index));
+
+ // increment null count stored inline
+ builder.index += 1;
+
+ // update the parent, since changing builder doesn't affect parent
+ if (parent.kind == Kind::kArray) {
+ Cast<Kind::kArray>(parent)->value_builder(builder);
+ } else {
+ Cast<Kind::kObject>(parent)->field_builder(field_index, builder);
+ }
+ return Status::OK();
+ }
+ case Kind::kBoolean:
+ return Cast<Kind::kBoolean>(builder)->AppendNull();
+
+ case Kind::kNumber:
+ return Cast<Kind::kNumber>(builder)->AppendNull();
+
+ case Kind::kString:
+ return Cast<Kind::kString>(builder)->AppendNull();
+
+ case Kind::kArray:
+ return Cast<Kind::kArray>(builder)->AppendNull();
+
+ case Kind::kObject: {
+ auto struct_builder = Cast<Kind::kObject>(builder);
+ RETURN_NOT_OK(struct_builder->AppendNull());
+
+ for (int i = 0; i < struct_builder->num_fields(); ++i) {
+ auto field_builder = struct_builder->field_builder(i);
+ RETURN_NOT_OK(AppendNull(builder, i, field_builder));
+ }
+ return Status::OK();
+ }
+ default:
+ return Status::NotImplemented("invalid builder Kind");
+ }
+ }
+
+ Status Finish(const std::shared_ptr<Array>& scalar_values, BuilderPtr builder,
+ std::shared_ptr<Array>* out) {
+ auto finish_children = [this, &scalar_values](BuilderPtr child,
+ std::shared_ptr<Array>* out) {
+ return Finish(scalar_values, child, out);
+ };
+ switch (builder.kind) {
+ case Kind::kNull: {
+ auto length = static_cast<int64_t>(builder.index);
+ *out = std::make_shared<NullArray>(length);
+ return Status::OK();
+ }
+ case Kind::kBoolean:
+ return Cast<Kind::kBoolean>(builder)->Finish(out);
+
+ case Kind::kNumber:
+ return FinishScalar(scalar_values, Cast<Kind::kNumber>(builder), out);
+
+ case Kind::kString:
+ return FinishScalar(scalar_values, Cast<Kind::kString>(builder), out);
+
+ case Kind::kArray:
+ return Cast<Kind::kArray>(builder)->Finish(std::move(finish_children), out);
+
+ case Kind::kObject:
+ return Cast<Kind::kObject>(builder)->Finish(std::move(finish_children), out);
+
+ default:
+ return Status::NotImplemented("invalid builder kind");
+ }
+ }
+
+ private:
+ /// finish a column of scalar values (string or number)
+ Status FinishScalar(const std::shared_ptr<Array>& scalar_values, ScalarBuilder* builder,
+ std::shared_ptr<Array>* out) {
+ std::shared_ptr<Array> indices;
+ // TODO(bkietz) embed builder->values_length() in this output somehow
+ RETURN_NOT_OK(builder->Finish(&indices));
+ auto ty = dictionary(int32(), scalar_values->type());
+ *out = std::make_shared<DictionaryArray>(ty, indices, scalar_values);
+ return Status::OK();
+ }
+
+ template <Kind::type kind>
+ std::vector<RawArrayBuilder<kind>>& arena() {
+ return std::get<static_cast<std::size_t>(kind)>(arenas_);
+ }
+
+ MemoryPool* pool_;
+ std::tuple<std::tuple<>, std::vector<RawArrayBuilder<Kind::kBoolean>>,
+ std::vector<RawArrayBuilder<Kind::kNumber>>,
+ std::vector<RawArrayBuilder<Kind::kString>>,
+ std::vector<RawArrayBuilder<Kind::kArray>>,
+ std::vector<RawArrayBuilder<Kind::kObject>>>
+ arenas_;
+};
+
+/// Three implementations are provided for BlockParser, one for each
+/// UnexpectedFieldBehavior. However most of the logic is identical in each
+/// case, so the majority of the implementation is in this base class
+class HandlerBase : public BlockParser,
+ public rj::BaseReaderHandler<rj::UTF8<>, HandlerBase> {
+ public:
+ explicit HandlerBase(MemoryPool* pool)
+ : BlockParser(pool),
+ builder_set_(pool),
+ field_index_(-1),
+ scalar_values_builder_(pool) {}
+
+ /// Retrieve a pointer to a builder from a BuilderPtr
+ template <Kind::type kind>
+ enable_if_t<kind != Kind::kNull, RawArrayBuilder<kind>*> Cast(BuilderPtr builder) {
+ return builder_set_.Cast<kind>(builder);
+ }
+
+ /// Accessor for a stored error Status
+ Status Error() { return status_; }
+
+ /// \defgroup rapidjson-handler-interface functions expected by rj::Reader
+ ///
+ /// bool Key(const char* data, rj::SizeType size, ...) is omitted since
+ /// the behavior varies greatly between UnexpectedFieldBehaviors
+ ///
+ /// @{
+ bool Null() {
+ status_ = builder_set_.AppendNull(builder_stack_.back(), field_index_, builder_);
+ return status_.ok();
+ }
+
+ bool Bool(bool value) {
+ constexpr auto kind = Kind::kBoolean;
+ if (ARROW_PREDICT_FALSE(builder_.kind != kind)) {
+ status_ = IllegallyChangedTo(kind);
+ return status_.ok();
+ }
+ status_ = Cast<kind>(builder_)->Append(value);
+ return status_.ok();
+ }
+
+ bool RawNumber(const char* data, rj::SizeType size, ...) {
+ status_ = AppendScalar<Kind::kNumber>(builder_, string_view(data, size));
+ return status_.ok();
+ }
+
+ bool String(const char* data, rj::SizeType size, ...) {
+ status_ = AppendScalar<Kind::kString>(builder_, string_view(data, size));
+ return status_.ok();
+ }
+
+ bool StartObject() {
+ status_ = StartObjectImpl();
+ return status_.ok();
+ }
+
+ bool EndObject(...) {
+ status_ = EndObjectImpl();
+ return status_.ok();
+ }
+
+ bool StartArray() {
+ status_ = StartArrayImpl();
+ return status_.ok();
+ }
+
+ bool EndArray(rj::SizeType size) {
+ status_ = EndArrayImpl(size);
+ return status_.ok();
+ }
+ /// @}
+
+ /// \brief Set up builders using an expected Schema
+ Status Initialize(const std::shared_ptr<Schema>& s) {
+ auto type = struct_({});
+ if (s) {
+ type = struct_(s->fields());
+ }
+ return builder_set_.MakeBuilder(*type, 0, &builder_);
+ }
+
+ Status Finish(std::shared_ptr<Array>* parsed) override {
+ std::shared_ptr<Array> scalar_values;
+ RETURN_NOT_OK(scalar_values_builder_.Finish(&scalar_values));
+ return builder_set_.Finish(scalar_values, builder_, parsed);
+ }
+
+ /// \brief Emit path of current field for debugging purposes
+ std::string Path() {
+ std::string path;
+ for (size_t i = 0; i < builder_stack_.size(); ++i) {
+ auto builder = builder_stack_[i];
+ if (builder.kind == Kind::kArray) {
+ path += "/[]";
+ } else {
+ auto struct_builder = Cast<Kind::kObject>(builder);
+ auto field_index = field_index_;
+ if (i + 1 < field_index_stack_.size()) {
+ field_index = field_index_stack_[i + 1];
+ }
+ path += "/" + struct_builder->FieldName(field_index);
+ }
+ }
+ return path;
+ }
+
+ protected:
+ template <typename Handler, typename Stream>
+ Status DoParse(Handler& handler, Stream&& json) {
+ constexpr auto parse_flags = rj::kParseIterativeFlag | rj::kParseNanAndInfFlag |
+ rj::kParseStopWhenDoneFlag |
+ rj::kParseNumbersAsStringsFlag;
+
+ rj::Reader reader;
+
+ for (; num_rows_ < kMaxParserNumRows; ++num_rows_) {
+ auto ok = reader.Parse<parse_flags>(json, handler);
+ switch (ok.Code()) {
+ case rj::kParseErrorNone:
+ // parse the next object
+ continue;
+ case rj::kParseErrorDocumentEmpty:
+ // parsed all objects, finish
+ return Status::OK();
+ case rj::kParseErrorTermination:
+ // handler emitted an error
+ return handler.Error();
+ default:
+ // rj emitted an error
+ return ParseError(rj::GetParseError_En(ok.Code()), " in row ", num_rows_);
+ }
+ }
+ return Status::Invalid("Exceeded maximum rows");
+ }
+
+ template <typename Handler>
+ Status DoParse(Handler& handler, const std::shared_ptr<Buffer>& json) {
+ RETURN_NOT_OK(ReserveScalarStorage(json->size()));
+ rj::MemoryStream ms(reinterpret_cast<const char*>(json->data()), json->size());
+ using InputStream = rj::EncodedInputStream<rj::UTF8<>, rj::MemoryStream>;
+ return DoParse(handler, InputStream(ms));
+ }
+
+ /// \defgroup handlerbase-append-methods append non-nested values
+ ///
+ /// @{
+
+ template <Kind::type kind>
+ Status AppendScalar(BuilderPtr builder, string_view scalar) {
+ if (ARROW_PREDICT_FALSE(builder.kind != kind)) {
+ return IllegallyChangedTo(kind);
+ }
+ auto index = static_cast<int32_t>(scalar_values_builder_.length());
+ auto value_length = static_cast<int32_t>(scalar.size());
+ RETURN_NOT_OK(Cast<kind>(builder)->Append(index, value_length));
+ RETURN_NOT_OK(scalar_values_builder_.Reserve(1));
+ scalar_values_builder_.UnsafeAppend(scalar);
+ return Status::OK();
+ }
+
+ /// @}
+
+ Status StartObjectImpl() {
+ constexpr auto kind = Kind::kObject;
+ if (ARROW_PREDICT_FALSE(builder_.kind != kind)) {
+ return IllegallyChangedTo(kind);
+ }
+ auto struct_builder = Cast<kind>(builder_);
+ absent_fields_stack_.Push(struct_builder->num_fields(), true);
+ StartNested();
+ return struct_builder->Append();
+ }
+
+ /// \brief helper for Key() functions
+ ///
+ /// sets the field builder with name key, or returns false if
+ /// there is no field with that name
+ bool SetFieldBuilder(string_view key, bool* duplicate_keys) {
+ auto parent = Cast<Kind::kObject>(builder_stack_.back());
+ field_index_ = parent->GetFieldIndex(std::string(key));
+ if (ARROW_PREDICT_FALSE(field_index_ == -1)) {
+ return false;
+ }
+ *duplicate_keys = !absent_fields_stack_[field_index_];
+ if (*duplicate_keys) {
+ status_ = ParseError("Column(", Path(), ") was specified twice in row ", num_rows_);
+ return false;
+ }
+ builder_ = parent->field_builder(field_index_);
+ absent_fields_stack_[field_index_] = false;
+ return true;
+ }
+
+ Status EndObjectImpl() {
+ auto parent = builder_stack_.back();
+
+ auto expected_count = absent_fields_stack_.TopSize();
+ for (int i = 0; i < expected_count; ++i) {
+ if (!absent_fields_stack_[i]) {
+ continue;
+ }
+ auto field_builder = Cast<Kind::kObject>(parent)->field_builder(i);
+ if (ARROW_PREDICT_FALSE(!field_builder.nullable)) {
+ return ParseError("a required field was absent");
+ }
+ RETURN_NOT_OK(builder_set_.AppendNull(parent, i, field_builder));
+ }
+ absent_fields_stack_.Pop();
+ EndNested();
+ return Status::OK();
+ }
+
+ Status StartArrayImpl() {
+ constexpr auto kind = Kind::kArray;
+ if (ARROW_PREDICT_FALSE(builder_.kind != kind)) {
+ return IllegallyChangedTo(kind);
+ }
+ StartNested();
+ // append to the list builder in EndArrayImpl
+ builder_ = Cast<kind>(builder_)->value_builder();
+ return Status::OK();
+ }
+
+ Status EndArrayImpl(rj::SizeType size) {
+ EndNested();
+ // append to list_builder here
+ auto list_builder = Cast<Kind::kArray>(builder_);
+ return list_builder->Append(size);
+ }
+
+ /// helper method for StartArray and StartObject
+ /// adds the current builder to a stack so its
+ /// children can be visited and parsed.
+ void StartNested() {
+ field_index_stack_.push_back(field_index_);
+ field_index_ = -1;
+ builder_stack_.push_back(builder_);
+ }
+
+ /// helper method for EndArray and EndObject
+ /// replaces the current builder with its parent
+ /// so parsing of the parent can continue
+ void EndNested() {
+ field_index_ = field_index_stack_.back();
+ field_index_stack_.pop_back();
+ builder_ = builder_stack_.back();
+ builder_stack_.pop_back();
+ }
+
+ Status IllegallyChangedTo(Kind::type illegally_changed_to) {
+ return ParseError("Column(", Path(), ") changed from ", Kind::Name(builder_.kind),
+ " to ", Kind::Name(illegally_changed_to), " in row ", num_rows_);
+ }
+
+ /// Reserve storage for scalars, these can occupy almost all of the JSON buffer
+ Status ReserveScalarStorage(int64_t size) override {
+ auto available_storage = scalar_values_builder_.value_data_capacity() -
+ scalar_values_builder_.value_data_length();
+ if (size <= available_storage) {
+ return Status::OK();
+ }
+ return scalar_values_builder_.ReserveData(size - available_storage);
+ }
+
+ Status status_;
+ RawBuilderSet builder_set_;
+ BuilderPtr builder_;
+ // top of this stack is the parent of builder_
+ std::vector<BuilderPtr> builder_stack_;
+ // top of this stack refers to the fields of the highest *StructBuilder*
+ // in builder_stack_ (list builders don't have absent fields)
+ BitsetStack absent_fields_stack_;
+ // index of builder_ within its parent
+ int field_index_;
+ // top of this stack == field_index_
+ std::vector<int> field_index_stack_;
+ StringBuilder scalar_values_builder_;
+};
+
+template <UnexpectedFieldBehavior>
+class Handler;
+
+template <>
+class Handler<UnexpectedFieldBehavior::Error> : public HandlerBase {
+ public:
+ using HandlerBase::HandlerBase;
+
+ Status Parse(const std::shared_ptr<Buffer>& json) override {
+ return DoParse(*this, json);
+ }
+
+ /// \ingroup rapidjson-handler-interface
+ ///
+ /// if an unexpected field is encountered, emit a parse error and bail
+ bool Key(const char* key, rj::SizeType len, ...) {
+ bool duplicate_keys = false;
+ if (ARROW_PREDICT_FALSE(SetFieldBuilder(string_view(key, len), &duplicate_keys))) {
+ return true;
+ }
+ if (!duplicate_keys) {
+ status_ = ParseError("unexpected field");
+ }
+ return false;
+ }
+};
+
+template <>
+class Handler<UnexpectedFieldBehavior::Ignore> : public HandlerBase {
+ public:
+ using HandlerBase::HandlerBase;
+
+ Status Parse(const std::shared_ptr<Buffer>& json) override {
+ return DoParse(*this, json);
+ }
+
+ bool Null() {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::Null();
+ }
+
+ bool Bool(bool value) {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::Bool(value);
+ }
+
+ bool RawNumber(const char* data, rj::SizeType size, ...) {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::RawNumber(data, size);
+ }
+
+ bool String(const char* data, rj::SizeType size, ...) {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::String(data, size);
+ }
+
+ bool StartObject() {
+ ++depth_;
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::StartObject();
+ }
+
+ /// \ingroup rapidjson-handler-interface
+ ///
+ /// if an unexpected field is encountered, skip until its value has been consumed
+ bool Key(const char* key, rj::SizeType len, ...) {
+ MaybeStopSkipping();
+ if (Skipping()) {
+ return true;
+ }
+ bool duplicate_keys = false;
+ if (ARROW_PREDICT_TRUE(SetFieldBuilder(string_view(key, len), &duplicate_keys))) {
+ return true;
+ }
+ if (ARROW_PREDICT_FALSE(duplicate_keys)) {
+ return false;
+ }
+ skip_depth_ = depth_;
+ return true;
+ }
+
+ bool EndObject(...) {
+ MaybeStopSkipping();
+ --depth_;
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::EndObject();
+ }
+
+ bool StartArray() {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::StartArray();
+ }
+
+ bool EndArray(rj::SizeType size) {
+ if (Skipping()) {
+ return true;
+ }
+ return HandlerBase::EndArray(size);
+ }
+
+ private:
+ bool Skipping() { return depth_ >= skip_depth_; }
+
+ void MaybeStopSkipping() {
+ if (skip_depth_ == depth_) {
+ skip_depth_ = std::numeric_limits<int>::max();
+ }
+ }
+
+ int depth_ = 0;
+ int skip_depth_ = std::numeric_limits<int>::max();
+};
+
+template <>
+class Handler<UnexpectedFieldBehavior::InferType> : public HandlerBase {
+ public:
+ using HandlerBase::HandlerBase;
+
+ Status Parse(const std::shared_ptr<Buffer>& json) override {
+ return DoParse(*this, json);
+ }
+
+ bool Bool(bool value) {
+ if (ARROW_PREDICT_FALSE(MaybePromoteFromNull<Kind::kBoolean>())) {
+ return false;
+ }
+ return HandlerBase::Bool(value);
+ }
+
+ bool RawNumber(const char* data, rj::SizeType size, ...) {
+ if (ARROW_PREDICT_FALSE(MaybePromoteFromNull<Kind::kNumber>())) {
+ return false;
+ }
+ return HandlerBase::RawNumber(data, size);
+ }
+
+ bool String(const char* data, rj::SizeType size, ...) {
+ if (ARROW_PREDICT_FALSE(MaybePromoteFromNull<Kind::kString>())) {
+ return false;
+ }
+ return HandlerBase::String(data, size);
+ }
+
+ bool StartObject() {
+ if (ARROW_PREDICT_FALSE(MaybePromoteFromNull<Kind::kObject>())) {
+ return false;
+ }
+ return HandlerBase::StartObject();
+ }
+
+ /// \ingroup rapidjson-handler-interface
+ ///
+ /// If an unexpected field is encountered, add a new builder to
+ /// the current parent builder. It is added as a NullBuilder with
+ /// (parent.length - 1) leading nulls. The next value parsed
+ /// will probably trigger promotion of this field from null
+ bool Key(const char* key, rj::SizeType len, ...) {
+ bool duplicate_keys = false;
+ if (ARROW_PREDICT_TRUE(SetFieldBuilder(string_view(key, len), &duplicate_keys))) {
+ return true;
+ }
+ if (ARROW_PREDICT_FALSE(duplicate_keys)) {
+ return false;
+ }
+ auto struct_builder = Cast<Kind::kObject>(builder_stack_.back());
+ auto leading_nulls = static_cast<uint32_t>(struct_builder->length() - 1);
+ builder_ = BuilderPtr(Kind::kNull, leading_nulls, true);
+ field_index_ = struct_builder->AddField(std::string(key, len), builder_);
+ return true;
+ }
+
+ bool StartArray() {
+ if (ARROW_PREDICT_FALSE(MaybePromoteFromNull<Kind::kArray>())) {
+ return false;
+ }
+ return HandlerBase::StartArray();
+ }
+
+ private:
+ // return true if a terminal error was encountered
+ template <Kind::type kind>
+ bool MaybePromoteFromNull() {
+ if (ARROW_PREDICT_TRUE(builder_.kind != Kind::kNull)) {
+ return false;
+ }
+ auto parent = builder_stack_.back();
+ if (parent.kind == Kind::kArray) {
+ auto list_builder = Cast<Kind::kArray>(parent);
+ DCHECK_EQ(list_builder->value_builder(), builder_);
+ status_ = builder_set_.MakeBuilder<kind>(builder_.index, &builder_);
+ if (ARROW_PREDICT_FALSE(!status_.ok())) {
+ return true;
+ }
+ list_builder = Cast<Kind::kArray>(parent);
+ list_builder->value_builder(builder_);
+ } else {
+ auto struct_builder = Cast<Kind::kObject>(parent);
+ DCHECK_EQ(struct_builder->field_builder(field_index_), builder_);
+ status_ = builder_set_.MakeBuilder<kind>(builder_.index, &builder_);
+ if (ARROW_PREDICT_FALSE(!status_.ok())) {
+ return true;
+ }
+ struct_builder = Cast<Kind::kObject>(parent);
+ struct_builder->field_builder(field_index_, builder_);
+ }
+ return false;
+ }
+};
+
+Status BlockParser::Make(MemoryPool* pool, const ParseOptions& options,
+ std::unique_ptr<BlockParser>* out) {
+ DCHECK(options.unexpected_field_behavior == UnexpectedFieldBehavior::InferType ||
+ options.explicit_schema != nullptr);
+
+ switch (options.unexpected_field_behavior) {
+ case UnexpectedFieldBehavior::Ignore: {
+ *out = make_unique<Handler<UnexpectedFieldBehavior::Ignore>>(pool);
+ break;
+ }
+ case UnexpectedFieldBehavior::Error: {
+ *out = make_unique<Handler<UnexpectedFieldBehavior::Error>>(pool);
+ break;
+ }
+ case UnexpectedFieldBehavior::InferType:
+ *out = make_unique<Handler<UnexpectedFieldBehavior::InferType>>(pool);
+ break;
+ }
+ return static_cast<HandlerBase&>(**out).Initialize(options.explicit_schema);
+}
+
+Status BlockParser::Make(const ParseOptions& options, std::unique_ptr<BlockParser>* out) {
+ return BlockParser::Make(default_memory_pool(), options, out);
+}
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.h
new file mode 100644
index 0000000000..4dd14e4b80
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/parser.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "arrow/json/options.h"
+#include "arrow/status.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+class Buffer;
+class MemoryPool;
+class KeyValueMetadata;
+class ResizableBuffer;
+
+namespace json {
+
+struct Kind {
+ enum type : uint8_t { kNull, kBoolean, kNumber, kString, kArray, kObject };
+
+ static const std::string& Name(Kind::type);
+
+ static const std::shared_ptr<const KeyValueMetadata>& Tag(Kind::type);
+
+ static Kind::type FromTag(const std::shared_ptr<const KeyValueMetadata>& tag);
+
+ static Status ForType(const DataType& type, Kind::type* kind);
+};
+
+constexpr int32_t kMaxParserNumRows = 100000;
+
+/// \class BlockParser
+/// \brief A reusable block-based parser for JSON data
+///
+/// The parser takes a block of newline delimited JSON data and extracts Arrays
+/// of unconverted strings which can be fed to a Converter to obtain a usable Array.
+///
+/// Note that in addition to parse errors (such as malformed JSON) some conversion
+/// errors are caught at parse time:
+/// - A null value in non-nullable column
+/// - Change in the JSON kind of a column. For example, if an explicit schema is provided
+/// which stipulates that field "a" is integral, a row of {"a": "not a number"} will
+/// result in an error. This also applies to fields outside an explicit schema.
+class ARROW_EXPORT BlockParser {
+ public:
+ virtual ~BlockParser() = default;
+
+ /// \brief Reserve storage for scalars parsed from a block of json
+ virtual Status ReserveScalarStorage(int64_t nbytes) = 0;
+
+ /// \brief Parse a block of data
+ virtual Status Parse(const std::shared_ptr<Buffer>& json) = 0;
+
+ /// \brief Extract parsed data
+ virtual Status Finish(std::shared_ptr<Array>* parsed) = 0;
+
+ /// \brief Return the number of parsed rows
+ int32_t num_rows() const { return num_rows_; }
+
+ /// \brief Construct a BlockParser
+ ///
+ /// \param[in] pool MemoryPool to use when constructing parsed array
+ /// \param[in] options ParseOptions to use when parsing JSON
+ /// \param[out] out constructed BlockParser
+ static Status Make(MemoryPool* pool, const ParseOptions& options,
+ std::unique_ptr<BlockParser>* out);
+
+ static Status Make(const ParseOptions& options, std::unique_ptr<BlockParser>* out);
+
+ protected:
+ ARROW_DISALLOW_COPY_AND_ASSIGN(BlockParser);
+
+ explicit BlockParser(MemoryPool* pool) : pool_(pool) {}
+
+ MemoryPool* pool_;
+ int32_t num_rows_ = 0;
+};
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/rapidjson_defs.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/rapidjson_defs.h
new file mode 100644
index 0000000000..9ed81d000c
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/rapidjson_defs.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Include this file before including any RapidJSON headers.
+
+#pragma once
+
+#define RAPIDJSON_HAS_STDSTRING 1
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
+
+// rapidjson will be defined in namespace arrow::rapidjson
+#define RAPIDJSON_NAMESPACE arrow::rapidjson
+#define RAPIDJSON_NAMESPACE_BEGIN \
+ namespace arrow { \
+ namespace rapidjson {
+#define RAPIDJSON_NAMESPACE_END \
+ } \
+ }
+
+// enable SIMD whitespace skipping, if available
+#if defined(ARROW_HAVE_SSE4_2)
+#define RAPIDJSON_SSE2 1
+#define RAPIDJSON_SSE42 1
+#endif
+
+#if defined(ARROW_HAVE_NEON)
+#define RAPIDJSON_NEON 1
+#endif
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc b/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc
new file mode 100644
index 0000000000..51c77fa4df
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.cc
@@ -0,0 +1,227 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/json/reader.h"
+
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/json/chunked_builder.h"
+#include "arrow/json/chunker.h"
+#include "arrow/json/converter.h"
+#include "arrow/json/parser.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/task_group.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using util::string_view;
+
+using internal::checked_cast;
+using internal::GetCpuThreadPool;
+using internal::TaskGroup;
+using internal::ThreadPool;
+
+namespace json {
+
+class TableReaderImpl : public TableReader,
+ public std::enable_shared_from_this<TableReaderImpl> {
+ public:
+ TableReaderImpl(MemoryPool* pool, const ReadOptions& read_options,
+ const ParseOptions& parse_options,
+ std::shared_ptr<TaskGroup> task_group)
+ : pool_(pool),
+ read_options_(read_options),
+ parse_options_(parse_options),
+ chunker_(MakeChunker(parse_options_)),
+ task_group_(std::move(task_group)) {}
+
+ Status Init(std::shared_ptr<io::InputStream> input) {
+ ARROW_ASSIGN_OR_RAISE(auto it,
+ io::MakeInputStreamIterator(input, read_options_.block_size));
+ return MakeReadaheadIterator(std::move(it), task_group_->parallelism())
+ .Value(&block_iterator_);
+ }
+
+ Result<std::shared_ptr<Table>> Read() override {
+ RETURN_NOT_OK(MakeBuilder());
+
+ ARROW_ASSIGN_OR_RAISE(auto block, block_iterator_.Next());
+ if (block == nullptr) {
+ return Status::Invalid("Empty JSON file");
+ }
+
+ auto self = shared_from_this();
+ auto empty = std::make_shared<Buffer>("");
+
+ int64_t block_index = 0;
+ std::shared_ptr<Buffer> partial = empty;
+
+ while (block != nullptr) {
+ std::shared_ptr<Buffer> next_block, whole, completion, next_partial;
+
+ ARROW_ASSIGN_OR_RAISE(next_block, block_iterator_.Next());
+
+ if (next_block == nullptr) {
+ // End of file reached => compute completion from penultimate block
+ RETURN_NOT_OK(chunker_->ProcessFinal(partial, block, &completion, &whole));
+ } else {
+ std::shared_ptr<Buffer> starts_with_whole;
+ // Get completion of partial from previous block.
+ RETURN_NOT_OK(chunker_->ProcessWithPartial(partial, block, &completion,
+ &starts_with_whole));
+
+ // Get all whole objects entirely inside the current buffer
+ RETURN_NOT_OK(chunker_->Process(starts_with_whole, &whole, &next_partial));
+ }
+
+ // Launch parse task
+ task_group_->Append([self, partial, completion, whole, block_index] {
+ return self->ParseAndInsert(partial, completion, whole, block_index);
+ });
+ block_index++;
+
+ partial = next_partial;
+ block = next_block;
+ }
+
+ std::shared_ptr<ChunkedArray> array;
+ RETURN_NOT_OK(builder_->Finish(&array));
+ return Table::FromChunkedStructArray(array);
+ }
+
+ private:
+ Status MakeBuilder() {
+ auto type = parse_options_.explicit_schema
+ ? struct_(parse_options_.explicit_schema->fields())
+ : struct_({});
+
+ auto promotion_graph =
+ parse_options_.unexpected_field_behavior == UnexpectedFieldBehavior::InferType
+ ? GetPromotionGraph()
+ : nullptr;
+
+ return MakeChunkedArrayBuilder(task_group_, pool_, promotion_graph, type, &builder_);
+ }
+
+ Status ParseAndInsert(const std::shared_ptr<Buffer>& partial,
+ const std::shared_ptr<Buffer>& completion,
+ const std::shared_ptr<Buffer>& whole, int64_t block_index) {
+ std::unique_ptr<BlockParser> parser;
+ RETURN_NOT_OK(BlockParser::Make(pool_, parse_options_, &parser));
+ RETURN_NOT_OK(parser->ReserveScalarStorage(partial->size() + completion->size() +
+ whole->size()));
+
+ if (partial->size() != 0 || completion->size() != 0) {
+ std::shared_ptr<Buffer> straddling;
+ if (partial->size() == 0) {
+ straddling = completion;
+ } else if (completion->size() == 0) {
+ straddling = partial;
+ } else {
+ ARROW_ASSIGN_OR_RAISE(straddling,
+ ConcatenateBuffers({partial, completion}, pool_));
+ }
+ RETURN_NOT_OK(parser->Parse(straddling));
+ }
+
+ if (whole->size() != 0) {
+ RETURN_NOT_OK(parser->Parse(whole));
+ }
+
+ std::shared_ptr<Array> parsed;
+ RETURN_NOT_OK(parser->Finish(&parsed));
+ builder_->Insert(block_index, field("", parsed->type()), parsed);
+ return Status::OK();
+ }
+
+ MemoryPool* pool_;
+ ReadOptions read_options_;
+ ParseOptions parse_options_;
+ std::unique_ptr<Chunker> chunker_;
+ std::shared_ptr<TaskGroup> task_group_;
+ Iterator<std::shared_ptr<Buffer>> block_iterator_;
+ std::shared_ptr<ChunkedArrayBuilder> builder_;
+};
+
+Status TableReader::Read(std::shared_ptr<Table>* out) { return Read().Value(out); }
+
+Result<std::shared_ptr<TableReader>> TableReader::Make(
+ MemoryPool* pool, std::shared_ptr<io::InputStream> input,
+ const ReadOptions& read_options, const ParseOptions& parse_options) {
+ std::shared_ptr<TableReaderImpl> ptr;
+ if (read_options.use_threads) {
+ ptr = std::make_shared<TableReaderImpl>(pool, read_options, parse_options,
+ TaskGroup::MakeThreaded(GetCpuThreadPool()));
+ } else {
+ ptr = std::make_shared<TableReaderImpl>(pool, read_options, parse_options,
+ TaskGroup::MakeSerial());
+ }
+ RETURN_NOT_OK(ptr->Init(input));
+ return ptr;
+}
+
+Status TableReader::Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
+ const ReadOptions& read_options,
+ const ParseOptions& parse_options,
+ std::shared_ptr<TableReader>* out) {
+ return TableReader::Make(pool, input, read_options, parse_options).Value(out);
+}
+
+Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
+ std::shared_ptr<Buffer> json) {
+ std::unique_ptr<BlockParser> parser;
+ RETURN_NOT_OK(BlockParser::Make(options, &parser));
+ RETURN_NOT_OK(parser->Parse(json));
+ std::shared_ptr<Array> parsed;
+ RETURN_NOT_OK(parser->Finish(&parsed));
+
+ auto type =
+ options.explicit_schema ? struct_(options.explicit_schema->fields()) : struct_({});
+ auto promotion_graph =
+ options.unexpected_field_behavior == UnexpectedFieldBehavior::InferType
+ ? GetPromotionGraph()
+ : nullptr;
+ std::shared_ptr<ChunkedArrayBuilder> builder;
+ RETURN_NOT_OK(MakeChunkedArrayBuilder(TaskGroup::MakeSerial(), default_memory_pool(),
+ promotion_graph, type, &builder));
+
+ builder->Insert(0, field("", type), parsed);
+ std::shared_ptr<ChunkedArray> converted_chunked;
+ RETURN_NOT_OK(builder->Finish(&converted_chunked));
+ const auto& converted = checked_cast<const StructArray&>(*converted_chunked->chunk(0));
+
+ std::vector<std::shared_ptr<Array>> columns(converted.num_fields());
+ for (int i = 0; i < converted.num_fields(); ++i) {
+ columns[i] = converted.field(i);
+ }
+ return RecordBatch::Make(schema(converted.type()->fields()), converted.length(),
+ std::move(columns));
+}
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.h
new file mode 100644
index 0000000000..c40338c1e1
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/reader.h
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/json/options.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+class Table;
+class RecordBatch;
+class Array;
+class DataType;
+
+namespace io {
+class InputStream;
+} // namespace io
+
+namespace json {
+
+/// A class that reads an entire JSON file into a Arrow Table
+///
+/// The file is expected to consist of individual line-separated JSON objects
+class ARROW_EXPORT TableReader {
+ public:
+ virtual ~TableReader() = default;
+
+ /// Read the entire JSON file and convert it to a Arrow Table
+ virtual Result<std::shared_ptr<Table>> Read() = 0;
+
+ ARROW_DEPRECATED("Use Result-returning version")
+ Status Read(std::shared_ptr<Table>* out);
+
+ /// Create a TableReader instance
+ static Result<std::shared_ptr<TableReader>> Make(MemoryPool* pool,
+ std::shared_ptr<io::InputStream> input,
+ const ReadOptions&,
+ const ParseOptions&);
+
+ ARROW_DEPRECATED("Use Result-returning version")
+ static Status Make(MemoryPool* pool, std::shared_ptr<io::InputStream> input,
+ const ReadOptions&, const ParseOptions&,
+ std::shared_ptr<TableReader>* out);
+};
+
+ARROW_EXPORT Result<std::shared_ptr<RecordBatch>> ParseOne(ParseOptions options,
+ std::shared_ptr<Buffer> json);
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/json/type_fwd.h b/contrib/libs/apache/arrow/cpp/src/arrow/json/type_fwd.h
new file mode 100644
index 0000000000..67e2e1bb40
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/json/type_fwd.h
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+namespace arrow {
+namespace json {
+
+class TableReader;
+struct ReadOptions;
+struct ParseOptions;
+
+} // namespace json
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/cpp/src/arrow/util/bitset_stack.h b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitset_stack.h
new file mode 100644
index 0000000000..addded9494
--- /dev/null
+++ b/contrib/libs/apache/arrow/cpp/src/arrow/util/bitset_stack.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
+#include "arrow/util/string_view.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief Store a stack of bitsets efficiently. The top bitset may be
+/// accessed and its bits may be modified, but it may not be resized.
+class BitsetStack {
+ public:
+ using reference = typename std::vector<bool>::reference;
+
+ /// \brief push a bitset onto the stack
+ /// \param size number of bits in the next bitset
+ /// \param value initial value for bits in the pushed bitset
+ void Push(int size, bool value) {
+ offsets_.push_back(bit_count());
+ bits_.resize(bit_count() + size, value);
+ }
+
+ /// \brief number of bits in the bitset at the top of the stack
+ int TopSize() const {
+ if (offsets_.size() == 0) return 0;
+ return bit_count() - offsets_.back();
+ }
+
+ /// \brief pop a bitset off the stack
+ void Pop() {
+ bits_.resize(offsets_.back());
+ offsets_.pop_back();
+ }
+
+ /// \brief get the value of a bit in the top bitset
+ /// \param i index of the bit to access
+ bool operator[](int i) const { return bits_[offsets_.back() + i]; }
+
+ /// \brief get a mutable reference to a bit in the top bitset
+ /// \param i index of the bit to access
+ reference operator[](int i) { return bits_[offsets_.back() + i]; }
+
+ private:
+ int bit_count() const { return static_cast<int>(bits_.size()); }
+ std::vector<bool> bits_;
+ std::vector<int> offsets_;
+};
+
+} // namespace internal
+} // namespace arrow
diff --git a/contrib/libs/apache/arrow/src/arrow/util/config.h b/contrib/libs/apache/arrow/src/arrow/util/config.h
index 2d46017e47..4e002e3d29 100644
--- a/contrib/libs/apache/arrow/src/arrow/util/config.h
+++ b/contrib/libs/apache/arrow/src/arrow/util/config.h
@@ -36,11 +36,11 @@
#define ARROW_COMPUTE
#define ARROW_CSV
-/* #undef ARROW_DATASET */
-/* #undef ARROW_FILESYSTEM */
+#define ARROW_DATASET
+#define ARROW_FILESYSTEM
/* #undef ARROW_FLIGHT */
#define ARROW_IPC
-/* #undef ARROW_JSON */
+#define ARROW_JSON
/* #undef ARROW_S3 */
#ifdef __GNUC__
diff --git a/contrib/libs/rapidjson/include/rapidjson/document.h b/contrib/libs/rapidjson/include/rapidjson/document.h
new file mode 100644
index 0000000000..a2b044c8da
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/document.h
@@ -0,0 +1,2602 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_DOCUMENT_H_
+#define RAPIDJSON_DOCUMENT_H_
+
+/*! \file document.h */
+
+#include "reader.h"
+#include "internal/meta.h"
+#include "internal/strfunc.h"
+#include "memorystream.h"
+#include "encodedstream.h"
+#include <new> // placement new
+#include <limits>
+
+RAPIDJSON_DIAG_PUSH
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
+RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data
+#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma push_macro("min")
+#pragma push_macro("max")
+#undef min
+#undef max
+#endif
+#endif
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_OFF(effc++)
+#if __GNUC__ >= 6
+RAPIDJSON_DIAG_OFF(terminate) // ignore throwing RAPIDJSON_ASSERT in RAPIDJSON_NOEXCEPT functions
+#endif
+#endif // __GNUC__
+
+#ifndef RAPIDJSON_NOMEMBERITERATORCLASS
+#include <iterator> // std::iterator, std::random_access_iterator_tag
+#endif
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#include <utility> // std::move
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+// Forward declaration.
+template <typename Encoding, typename Allocator>
+class GenericValue;
+
+template <typename Encoding, typename Allocator, typename StackAllocator>
+class GenericDocument;
+
+//! Name-value pair in a JSON object value.
+/*!
+ This class was internal to GenericValue. It used to be a inner struct.
+ But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct.
+ https://code.google.com/p/rapidjson/issues/detail?id=64
+*/
+template <typename Encoding, typename Allocator>
+struct GenericMember {
+ GenericValue<Encoding, Allocator> name; //!< name of member (must be a string)
+ GenericValue<Encoding, Allocator> value; //!< value of member.
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericMemberIterator
+
+#ifndef RAPIDJSON_NOMEMBERITERATORCLASS
+
+//! (Constant) member iterator for a JSON object value
+/*!
+ \tparam Const Is this a constant iterator?
+ \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document)
+ \tparam Allocator Allocator type for allocating memory of object, array and string.
+
+ This class implements a Random Access Iterator for GenericMember elements
+ of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements].
+
+ \note This iterator implementation is mainly intended to avoid implicit
+ conversions from iterator values to \c NULL,
+ e.g. from GenericValue::FindMember.
+
+ \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a
+ pointer-based implementation, if your platform doesn't provide
+ the C++ <iterator> header.
+
+ \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator
+ */
+template <bool Const, typename Encoding, typename Allocator>
+class GenericMemberIterator
+ : public std::iterator<std::random_access_iterator_tag
+ , typename internal::MaybeAddConst<Const,GenericMember<Encoding,Allocator> >::Type> {
+
+ friend class GenericValue<Encoding,Allocator>;
+ template <bool, typename, typename> friend class GenericMemberIterator;
+
+ typedef GenericMember<Encoding,Allocator> PlainType;
+ typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+ typedef std::iterator<std::random_access_iterator_tag,ValueType> BaseType;
+
+public:
+ //! Iterator type itself
+ typedef GenericMemberIterator Iterator;
+ //! Constant iterator type
+ typedef GenericMemberIterator<true,Encoding,Allocator> ConstIterator;
+ //! Non-constant iterator type
+ typedef GenericMemberIterator<false,Encoding,Allocator> NonConstIterator;
+
+ //! Pointer to (const) GenericMember
+ typedef typename BaseType::pointer Pointer;
+ //! Reference to (const) GenericMember
+ typedef typename BaseType::reference Reference;
+ //! Signed integer type (e.g. \c ptrdiff_t)
+ typedef typename BaseType::difference_type DifferenceType;
+
+ //! Default constructor (singular value)
+ /*! Creates an iterator pointing to no element.
+ \note All operations, except for comparisons, are undefined on such values.
+ */
+ GenericMemberIterator() : ptr_() {}
+
+ //! Iterator conversions to more const
+ /*!
+ \param it (Non-const) iterator to copy from
+
+ Allows the creation of an iterator from another GenericMemberIterator
+ that is "less const". Especially, creating a non-constant iterator
+ from a constant iterator are disabled:
+ \li const -> non-const (not ok)
+ \li const -> const (ok)
+ \li non-const -> const (ok)
+ \li non-const -> non-const (ok)
+
+ \note If the \c Const template parameter is already \c false, this
+ constructor effectively defines a regular copy-constructor.
+ Otherwise, the copy constructor is implicitly defined.
+ */
+ GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {}
+ Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; }
+
+ //! @name stepping
+ //@{
+ Iterator& operator++(){ ++ptr_; return *this; }
+ Iterator& operator--(){ --ptr_; return *this; }
+ Iterator operator++(int){ Iterator old(*this); ++ptr_; return old; }
+ Iterator operator--(int){ Iterator old(*this); --ptr_; return old; }
+ //@}
+
+ //! @name increment/decrement
+ //@{
+ Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); }
+ Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); }
+
+ Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; }
+ Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; }
+ //@}
+
+ //! @name relations
+ //@{
+ bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; }
+ bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; }
+ bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; }
+ bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; }
+ bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; }
+ bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; }
+ //@}
+
+ //! @name dereference
+ //@{
+ Reference operator*() const { return *ptr_; }
+ Pointer operator->() const { return ptr_; }
+ Reference operator[](DifferenceType n) const { return ptr_[n]; }
+ //@}
+
+ //! Distance
+ DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; }
+
+private:
+ //! Internal constructor from plain pointer
+ explicit GenericMemberIterator(Pointer p) : ptr_(p) {}
+
+ Pointer ptr_; //!< raw pointer
+};
+
+#else // RAPIDJSON_NOMEMBERITERATORCLASS
+
+// class-based member iterator implementation disabled, use plain pointers
+
+template <bool Const, typename Encoding, typename Allocator>
+struct GenericMemberIterator;
+
+//! non-const GenericMemberIterator
+template <typename Encoding, typename Allocator>
+struct GenericMemberIterator<false,Encoding,Allocator> {
+ //! use plain pointer as iterator type
+ typedef GenericMember<Encoding,Allocator>* Iterator;
+};
+//! const GenericMemberIterator
+template <typename Encoding, typename Allocator>
+struct GenericMemberIterator<true,Encoding,Allocator> {
+ //! use plain const pointer as iterator type
+ typedef const GenericMember<Encoding,Allocator>* Iterator;
+};
+
+#endif // RAPIDJSON_NOMEMBERITERATORCLASS
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericStringRef
+
+//! Reference to a constant string (not taking a copy)
+/*!
+ \tparam CharType character type of the string
+
+ This helper class is used to automatically infer constant string
+ references for string literals, especially from \c const \b (!)
+ character arrays.
+
+ The main use is for creating JSON string values without copying the
+ source string via an \ref Allocator. This requires that the referenced
+ string pointers have a sufficient lifetime, which exceeds the lifetime
+ of the associated GenericValue.
+
+ \b Example
+ \code
+ Value v("foo"); // ok, no need to copy & calculate length
+ const char foo[] = "foo";
+ v.SetString(foo); // ok
+
+ const char* bar = foo;
+ // Value x(bar); // not ok, can't rely on bar's lifetime
+ Value x(StringRef(bar)); // lifetime explicitly guaranteed by user
+ Value y(StringRef(bar, 3)); // ok, explicitly pass length
+ \endcode
+
+ \see StringRef, GenericValue::SetString
+*/
+template<typename CharType>
+struct GenericStringRef {
+ typedef CharType Ch; //!< character type of the string
+
+ //! Create string reference from \c const character array
+#ifndef __clang__ // -Wdocumentation
+ /*!
+ This constructor implicitly creates a constant string reference from
+ a \c const character array. It has better performance than
+ \ref StringRef(const CharType*) by inferring the string \ref length
+ from the array length, and also supports strings containing null
+ characters.
+
+ \tparam N length of the string, automatically inferred
+
+ \param str Constant character array, lifetime assumed to be longer
+ than the use of the string in e.g. a GenericValue
+
+ \post \ref s == str
+
+ \note Constant complexity.
+ \note There is a hidden, private overload to disallow references to
+ non-const character arrays to be created via this constructor.
+ By this, e.g. function-scope arrays used to be filled via
+ \c snprintf are excluded from consideration.
+ In such cases, the referenced string should be \b copied to the
+ GenericValue instead.
+ */
+#endif
+ template<SizeType N>
+ GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT
+ : s(str), length(N-1) {}
+
+ //! Explicitly create string reference from \c const character pointer
+#ifndef __clang__ // -Wdocumentation
+ /*!
+ This constructor can be used to \b explicitly create a reference to
+ a constant string pointer.
+
+ \see StringRef(const CharType*)
+
+ \param str Constant character pointer, lifetime assumed to be longer
+ than the use of the string in e.g. a GenericValue
+
+ \post \ref s == str
+
+ \note There is a hidden, private overload to disallow references to
+ non-const character arrays to be created via this constructor.
+ By this, e.g. function-scope arrays used to be filled via
+ \c snprintf are excluded from consideration.
+ In such cases, the referenced string should be \b copied to the
+ GenericValue instead.
+ */
+#endif
+ explicit GenericStringRef(const CharType* str)
+ : s(str), length(internal::StrLen(str)){ RAPIDJSON_ASSERT(s != 0); }
+
+ //! Create constant string reference from pointer and length
+#ifndef __clang__ // -Wdocumentation
+ /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+ \param len length of the string, excluding the trailing NULL terminator
+
+ \post \ref s == str && \ref length == len
+ \note Constant complexity.
+ */
+#endif
+ GenericStringRef(const CharType* str, SizeType len)
+ : s(str), length(len) { RAPIDJSON_ASSERT(s != 0); }
+
+ GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {}
+
+ //! implicit conversion to plain CharType pointer
+ operator const Ch *() const { return s; }
+
+ const Ch* const s; //!< plain CharType pointer
+ const SizeType length; //!< length of the string (excluding the trailing NULL terminator)
+
+private:
+ //! Disallow construction from non-const array
+ template<SizeType N>
+ GenericStringRef(CharType (&str)[N]) /* = delete */;
+ //! Copy assignment operator not permitted - immutable type
+ GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */;
+};
+
+//! Mark a character pointer as constant string
+/*! Mark a plain character pointer as a "string literal". This function
+ can be used to avoid copying a character string to be referenced as a
+ value in a JSON GenericValue object, if the string's lifetime is known
+ to be valid long enough.
+ \tparam CharType Character type of the string
+ \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+ \return GenericStringRef string reference object
+ \relatesalso GenericStringRef
+
+ \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str) {
+ return GenericStringRef<CharType>(str, internal::StrLen(str));
+}
+
+//! Mark a character pointer as constant string
+/*! Mark a plain character pointer as a "string literal". This function
+ can be used to avoid copying a character string to be referenced as a
+ value in a JSON GenericValue object, if the string's lifetime is known
+ to be valid long enough.
+
+ This version has better performance with supplied length, and also
+ supports string containing null characters.
+
+ \tparam CharType character type of the string
+ \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+ \param length The length of source string.
+ \return GenericStringRef string reference object
+ \relatesalso GenericStringRef
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length) {
+ return GenericStringRef<CharType>(str, SizeType(length));
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+//! Mark a string object as constant string
+/*! Mark a string object (e.g. \c std::string) as a "string literal".
+ This function can be used to avoid copying a string to be referenced as a
+ value in a JSON GenericValue object, if the string's lifetime is known
+ to be valid long enough.
+
+ \tparam CharType character type of the string
+ \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+ \return GenericStringRef string reference object
+ \relatesalso GenericStringRef
+ \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& str) {
+ return GenericStringRef<CharType>(str.data(), SizeType(str.size()));
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericValue type traits
+namespace internal {
+
+template <typename T, typename Encoding = void, typename Allocator = void>
+struct IsGenericValueImpl : FalseType {};
+
+// select candidates according to nested encoding and allocator types
+template <typename T> struct IsGenericValueImpl<T, typename Void<typename T::EncodingType>::Type, typename Void<typename T::AllocatorType>::Type>
+ : IsBaseOf<GenericValue<typename T::EncodingType, typename T::AllocatorType>, T>::Type {};
+
+// helper to match arbitrary GenericValue instantiations, including derived classes
+template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {};
+
+} // namespace internal
+
+///////////////////////////////////////////////////////////////////////////////
+// TypeHelper
+
+namespace internal {
+
+template <typename ValueType, typename T>
+struct TypeHelper {};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, bool> {
+ static bool Is(const ValueType& v) { return v.IsBool(); }
+ static bool Get(const ValueType& v) { return v.GetBool(); }
+ static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); }
+ static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, int> {
+ static bool Is(const ValueType& v) { return v.IsInt(); }
+ static int Get(const ValueType& v) { return v.GetInt(); }
+ static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); }
+ static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, unsigned> {
+ static bool Is(const ValueType& v) { return v.IsUint(); }
+ static unsigned Get(const ValueType& v) { return v.GetUint(); }
+ static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); }
+ static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, int64_t> {
+ static bool Is(const ValueType& v) { return v.IsInt64(); }
+ static int64_t Get(const ValueType& v) { return v.GetInt64(); }
+ static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); }
+ static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, uint64_t> {
+ static bool Is(const ValueType& v) { return v.IsUint64(); }
+ static uint64_t Get(const ValueType& v) { return v.GetUint64(); }
+ static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); }
+ static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, double> {
+ static bool Is(const ValueType& v) { return v.IsDouble(); }
+ static double Get(const ValueType& v) { return v.GetDouble(); }
+ static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); }
+ static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, float> {
+ static bool Is(const ValueType& v) { return v.IsFloat(); }
+ static float Get(const ValueType& v) { return v.GetFloat(); }
+ static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); }
+ static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, const typename ValueType::Ch*> {
+ typedef const typename ValueType::Ch* StringType;
+ static bool Is(const ValueType& v) { return v.IsString(); }
+ static StringType Get(const ValueType& v) { return v.GetString(); }
+ static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); }
+ static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+
+#if RAPIDJSON_HAS_STDSTRING
+template<typename ValueType>
+struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch> > {
+ typedef std::basic_string<typename ValueType::Ch> StringType;
+ static bool Is(const ValueType& v) { return v.IsString(); }
+ static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); }
+ static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+#endif
+
+template<typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::Array> {
+ typedef typename ValueType::Array ArrayType;
+ static bool Is(const ValueType& v) { return v.IsArray(); }
+ static ArrayType Get(ValueType& v) { return v.GetArray(); }
+ static ValueType& Set(ValueType& v, ArrayType data) { return v = data; }
+ static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::ConstArray> {
+ typedef typename ValueType::ConstArray ArrayType;
+ static bool Is(const ValueType& v) { return v.IsArray(); }
+ static ArrayType Get(const ValueType& v) { return v.GetArray(); }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::Object> {
+ typedef typename ValueType::Object ObjectType;
+ static bool Is(const ValueType& v) { return v.IsObject(); }
+ static ObjectType Get(ValueType& v) { return v.GetObject(); }
+ static ValueType& Set(ValueType& v, ObjectType data) { return v = data; }
+ static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { v = data; }
+};
+
+template<typename ValueType>
+struct TypeHelper<ValueType, typename ValueType::ConstObject> {
+ typedef typename ValueType::ConstObject ObjectType;
+ static bool Is(const ValueType& v) { return v.IsObject(); }
+ static ObjectType Get(const ValueType& v) { return v.GetObject(); }
+};
+
+} // namespace internal
+
+// Forward declarations
+template <bool, typename> class GenericArray;
+template <bool, typename> class GenericObject;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericValue
+
+//! Represents a JSON value. Use Value for UTF8 encoding and default allocator.
+/*!
+ A JSON value can be one of 7 types. This class is a variant type supporting
+ these types.
+
+ Use the Value if UTF8 and default allocator
+
+ \tparam Encoding Encoding of the value. (Even non-string values need to have the same encoding in a document)
+ \tparam Allocator Allocator type for allocating memory of object, array and string.
+*/
+template <typename Encoding, typename Allocator = MemoryPoolAllocator<> >
+class GenericValue {
+public:
+ //! Name-value pair in an object.
+ typedef GenericMember<Encoding, Allocator> Member;
+ typedef Encoding EncodingType; //!< Encoding type from template parameter.
+ typedef Allocator AllocatorType; //!< Allocator type from template parameter.
+ typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding.
+ typedef GenericStringRef<Ch> StringRefType; //!< Reference to a constant string
+ typedef typename GenericMemberIterator<false,Encoding,Allocator>::Iterator MemberIterator; //!< Member iterator for iterating in object.
+ typedef typename GenericMemberIterator<true,Encoding,Allocator>::Iterator ConstMemberIterator; //!< Constant member iterator for iterating in object.
+ typedef GenericValue* ValueIterator; //!< Value iterator for iterating in array.
+ typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array.
+ typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of itself.
+ typedef GenericArray<false, ValueType> Array;
+ typedef GenericArray<true, ValueType> ConstArray;
+ typedef GenericObject<false, ValueType> Object;
+ typedef GenericObject<true, ValueType> ConstObject;
+
+ //!@name Constructors and destructor.
+ //@{
+
+ //! Default constructor creates a null value.
+ GenericValue() RAPIDJSON_NOEXCEPT : data_() { data_.f.flags = kNullFlag; }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ //! Move constructor in C++11
+ GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) {
+ rhs.data_.f.flags = kNullFlag; // give up contents
+ }
+#endif
+
+private:
+ //! Copy constructor is not permitted.
+ GenericValue(const GenericValue& rhs);
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ //! Moving from a GenericDocument is not permitted.
+ template <typename StackAllocator>
+ GenericValue(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+
+ //! Move assignment from a GenericDocument is not permitted.
+ template <typename StackAllocator>
+ GenericValue& operator=(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+#endif
+
+public:
+
+ //! Constructor with JSON value type.
+ /*! This creates a Value of specified type with default content.
+ \param type Type of the value.
+ \note Default content for number is zero.
+ */
+ explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() {
+ static const uint16_t defaultFlags[7] = {
+ kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag,
+ kNumberAnyFlag
+ };
+ RAPIDJSON_ASSERT(type <= kNumberType);
+ data_.f.flags = defaultFlags[type];
+
+ // Use ShortString to store empty string.
+ if (type == kStringType)
+ data_.ss.SetLength(0);
+ }
+
+ //! Explicit copy constructor (with allocator)
+ /*! Creates a copy of a Value by using the given Allocator
+ \tparam SourceAllocator allocator of \c rhs
+ \param rhs Value to copy from (read-only)
+ \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator().
+ \see CopyFrom()
+ */
+ template <typename SourceAllocator>
+ GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator) {
+ switch (rhs.GetType()) {
+ case kObjectType: {
+ SizeType count = rhs.data_.o.size;
+ Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+ const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+ for (SizeType i = 0; i < count; i++) {
+ new (&lm[i].name) GenericValue(rm[i].name, allocator);
+ new (&lm[i].value) GenericValue(rm[i].value, allocator);
+ }
+ data_.f.flags = kObjectFlag;
+ data_.o.size = data_.o.capacity = count;
+ SetMembersPointer(lm);
+ }
+ break;
+ case kArrayType: {
+ SizeType count = rhs.data_.a.size;
+ GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+ const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer();
+ for (SizeType i = 0; i < count; i++)
+ new (&le[i]) GenericValue(re[i], allocator);
+ data_.f.flags = kArrayFlag;
+ data_.a.size = data_.a.capacity = count;
+ SetElementsPointer(le);
+ }
+ break;
+ case kStringType:
+ if (rhs.data_.f.flags == kConstStringFlag) {
+ data_.f.flags = rhs.data_.f.flags;
+ data_ = *reinterpret_cast<const Data*>(&rhs.data_);
+ }
+ else
+ SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
+ break;
+ default:
+ data_.f.flags = rhs.data_.f.flags;
+ data_ = *reinterpret_cast<const Data*>(&rhs.data_);
+ break;
+ }
+ }
+
+ //! Constructor for boolean value.
+ /*! \param b Boolean value
+ \note This constructor is limited to \em real boolean values and rejects
+ implicitly converted types like arbitrary pointers. Use an explicit cast
+ to \c bool, if you want to construct a boolean JSON value in such cases.
+ */
+#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen
+ template <typename T>
+ explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>))) RAPIDJSON_NOEXCEPT // See #472
+#else
+ explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT
+#endif
+ : data_() {
+ // safe-guard against failing SFINAE
+ RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool,T>::Value));
+ data_.f.flags = b ? kTrueFlag : kFalseFlag;
+ }
+
+ //! Constructor for int value.
+ explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() {
+ data_.n.i64 = i;
+ data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag;
+ }
+
+ //! Constructor for unsigned value.
+ explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() {
+ data_.n.u64 = u;
+ data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag);
+ }
+
+ //! Constructor for int64_t value.
+ explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() {
+ data_.n.i64 = i64;
+ data_.f.flags = kNumberInt64Flag;
+ if (i64 >= 0) {
+ data_.f.flags |= kNumberUint64Flag;
+ if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+ data_.f.flags |= kUintFlag;
+ if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+ data_.f.flags |= kIntFlag;
+ }
+ else if (i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+ data_.f.flags |= kIntFlag;
+ }
+
+ //! Constructor for uint64_t value.
+ explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() {
+ data_.n.u64 = u64;
+ data_.f.flags = kNumberUint64Flag;
+ if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000)))
+ data_.f.flags |= kInt64Flag;
+ if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+ data_.f.flags |= kUintFlag;
+ if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+ data_.f.flags |= kIntFlag;
+ }
+
+ //! Constructor for double value.
+ explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; }
+
+ //! Constructor for float value.
+ explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast<double>(f); data_.f.flags = kNumberDoubleFlag; }
+
+ //! Constructor for constant string (i.e. do not make a copy of string)
+ GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); }
+
+ //! Constructor for constant string (i.e. do not make a copy of string)
+ explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); }
+
+ //! Constructor for copy-string (i.e. do make a copy of string)
+ GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); }
+
+ //! Constructor for copy-string (i.e. do make a copy of string)
+ GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Constructor for copy-string from a string object (i.e. do make a copy of string)
+ /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+ */
+ GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+#endif
+
+ //! Constructor for Array.
+ /*!
+ \param a An array obtained by \c GetArray().
+ \note \c Array is always pass-by-value.
+ \note the source array is moved into this value and the sourec array becomes empty.
+ */
+ GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) {
+ a.value_.data_ = Data();
+ a.value_.data_.f.flags = kArrayFlag;
+ }
+
+ //! Constructor for Object.
+ /*!
+ \param o An object obtained by \c GetObject().
+ \note \c Object is always pass-by-value.
+ \note the source object is moved into this value and the sourec object becomes empty.
+ */
+ GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) {
+ o.value_.data_ = Data();
+ o.value_.data_.f.flags = kObjectFlag;
+ }
+
+ //! Destructor.
+ /*! Need to destruct elements of array, members of object, or copy-string.
+ */
+ ~GenericValue() {
+ if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+ switch(data_.f.flags) {
+ case kArrayFlag:
+ {
+ GenericValue* e = GetElementsPointer();
+ for (GenericValue* v = e; v != e + data_.a.size; ++v)
+ v->~GenericValue();
+ Allocator::Free(e);
+ }
+ break;
+
+ case kObjectFlag:
+ for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+ m->~Member();
+ Allocator::Free(GetMembersPointer());
+ break;
+
+ case kCopyStringFlag:
+ Allocator::Free(const_cast<Ch*>(GetStringPointer()));
+ break;
+
+ default:
+ break; // Do nothing for other types.
+ }
+ }
+ }
+
+ //@}
+
+ //!@name Assignment operators
+ //@{
+
+ //! Assignment with move semantics.
+ /*! \param rhs Source of the assignment. It will become a null value after assignment.
+ */
+ GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
+ RAPIDJSON_ASSERT(this != &rhs);
+ this->~GenericValue();
+ RawAssign(rhs);
+ return *this;
+ }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ //! Move assignment in C++11
+ GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT {
+ return *this = rhs.Move();
+ }
+#endif
+
+ //! Assignment of constant string reference (no copy)
+ /*! \param str Constant string reference to be assigned
+ \note This overload is needed to avoid clashes with the generic primitive type assignment overload below.
+ \see GenericStringRef, operator=(T)
+ */
+ GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT {
+ GenericValue s(str);
+ return *this = s;
+ }
+
+ //! Assignment with primitive types.
+ /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+ \param value The value to be assigned.
+
+ \note The source type \c T explicitly disallows all pointer types,
+ especially (\c const) \ref Ch*. This helps avoiding implicitly
+ referencing character strings with insufficient lifetime, use
+ \ref SetString(const Ch*, Allocator&) (for copying) or
+ \ref StringRef() (to explicitly mark the pointer as constant) instead.
+ All other pointer types would implicitly convert to \c bool,
+ use \ref SetBool() instead.
+ */
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer<T>), (GenericValue&))
+ operator=(T value) {
+ GenericValue v(value);
+ return *this = v;
+ }
+
+ //! Deep-copy assignment from Value
+ /*! Assigns a \b copy of the Value to the current Value object
+ \tparam SourceAllocator Allocator type of \c rhs
+ \param rhs Value to copy from (read-only)
+ \param allocator Allocator to use for copying
+ */
+ template <typename SourceAllocator>
+ GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator) {
+ RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs));
+ this->~GenericValue();
+ new (this) GenericValue(rhs, allocator);
+ return *this;
+ }
+
+ //! Exchange the contents of this value with those of other.
+ /*!
+ \param other Another value.
+ \note Constant complexity.
+ */
+ GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT {
+ GenericValue temp;
+ temp.RawAssign(*this);
+ RawAssign(other);
+ other.RawAssign(temp);
+ return *this;
+ }
+
+ //! free-standing swap function helper
+ /*!
+ Helper function to enable support for common swap implementation pattern based on \c std::swap:
+ \code
+ void swap(MyClass& a, MyClass& b) {
+ using std::swap;
+ swap(a.value, b.value);
+ // ...
+ }
+ \endcode
+ \see Swap()
+ */
+ friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
+ //! Prepare Value for move semantics
+ /*! \return *this */
+ GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; }
+ //@}
+
+ //!@name Equal-to and not-equal-to operators
+ //@{
+ //! Equal-to operator
+ /*!
+ \note If an object contains duplicated named member, comparing equality with any object is always \c false.
+ \note Linear time complexity (number of all values in the subtree and total lengths of all strings).
+ */
+ template <typename SourceAllocator>
+ bool operator==(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+ typedef GenericValue<Encoding, SourceAllocator> RhsType;
+ if (GetType() != rhs.GetType())
+ return false;
+
+ switch (GetType()) {
+ case kObjectType: // Warning: O(n^2) inner-loop
+ if (data_.o.size != rhs.data_.o.size)
+ return false;
+ for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) {
+ typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name);
+ if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value)
+ return false;
+ }
+ return true;
+
+ case kArrayType:
+ if (data_.a.size != rhs.data_.a.size)
+ return false;
+ for (SizeType i = 0; i < data_.a.size; i++)
+ if ((*this)[i] != rhs[i])
+ return false;
+ return true;
+
+ case kStringType:
+ return StringEqual(rhs);
+
+ case kNumberType:
+ if (IsDouble() || rhs.IsDouble()) {
+ double a = GetDouble(); // May convert from integer to double.
+ double b = rhs.GetDouble(); // Ditto
+ return a >= b && a <= b; // Prevent -Wfloat-equal
+ }
+ else
+ return data_.n.u64 == rhs.data_.n.u64;
+
+ default:
+ return true;
+ }
+ }
+
+ //! Equal-to operator with const C-string pointer
+ bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Equal-to operator with string object
+ /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+ */
+ bool operator==(const std::basic_string<Ch>& rhs) const { return *this == GenericValue(StringRef(rhs)); }
+#endif
+
+ //! Equal-to operator with primitive types
+ /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false
+ */
+ template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>,internal::IsGenericValue<T> >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); }
+
+ //! Not-equal-to operator
+ /*! \return !(*this == rhs)
+ */
+ template <typename SourceAllocator>
+ bool operator!=(const GenericValue<Encoding, SourceAllocator>& rhs) const { return !(*this == rhs); }
+
+ //! Not-equal-to operator with const C-string pointer
+ bool operator!=(const Ch* rhs) const { return !(*this == rhs); }
+
+ //! Not-equal-to operator with arbitrary types
+ /*! \return !(*this == rhs)
+ */
+ template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); }
+
+ //! Equal-to operator with arbitrary types (symmetric version)
+ /*! \return (rhs == lhs)
+ */
+ template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; }
+
+ //! Not-Equal-to operator with arbitrary types (symmetric version)
+ /*! \return !(rhs == lhs)
+ */
+ template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); }
+ //@}
+
+ //!@name Type
+ //@{
+
+ Type GetType() const { return static_cast<Type>(data_.f.flags & kTypeMask); }
+ bool IsNull() const { return data_.f.flags == kNullFlag; }
+ bool IsFalse() const { return data_.f.flags == kFalseFlag; }
+ bool IsTrue() const { return data_.f.flags == kTrueFlag; }
+ bool IsBool() const { return (data_.f.flags & kBoolFlag) != 0; }
+ bool IsObject() const { return data_.f.flags == kObjectFlag; }
+ bool IsArray() const { return data_.f.flags == kArrayFlag; }
+ bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; }
+ bool IsInt() const { return (data_.f.flags & kIntFlag) != 0; }
+ bool IsUint() const { return (data_.f.flags & kUintFlag) != 0; }
+ bool IsInt64() const { return (data_.f.flags & kInt64Flag) != 0; }
+ bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; }
+ bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; }
+ bool IsString() const { return (data_.f.flags & kStringFlag) != 0; }
+
+ // Checks whether a number can be losslessly converted to a double.
+ bool IsLosslessDouble() const {
+ if (!IsNumber()) return false;
+ if (IsUint64()) {
+ uint64_t u = GetUint64();
+ volatile double d = static_cast<double>(u);
+ return (d >= 0.0)
+ && (d < static_cast<double>(std::numeric_limits<uint64_t>::max()))
+ && (u == static_cast<uint64_t>(d));
+ }
+ if (IsInt64()) {
+ int64_t i = GetInt64();
+ volatile double d = static_cast<double>(i);
+ return (d >= static_cast<double>(std::numeric_limits<int64_t>::min()))
+ && (d < static_cast<double>(std::numeric_limits<int64_t>::max()))
+ && (i == static_cast<int64_t>(d));
+ }
+ return true; // double, int, uint are always lossless
+ }
+
+ // Checks whether a number is a float (possible lossy).
+ bool IsFloat() const {
+ if ((data_.f.flags & kDoubleFlag) == 0)
+ return false;
+ double d = GetDouble();
+ return d >= -3.4028234e38 && d <= 3.4028234e38;
+ }
+ // Checks whether a number can be losslessly converted to a float.
+ bool IsLosslessFloat() const {
+ if (!IsNumber()) return false;
+ double a = GetDouble();
+ if (a < static_cast<double>(-std::numeric_limits<float>::max())
+ || a > static_cast<double>(std::numeric_limits<float>::max()))
+ return false;
+ double b = static_cast<double>(static_cast<float>(a));
+ return a >= b && a <= b; // Prevent -Wfloat-equal
+ }
+
+ //@}
+
+ //!@name Null
+ //@{
+
+ GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; }
+
+ //@}
+
+ //!@name Bool
+ //@{
+
+ bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; }
+ //!< Set boolean value
+ /*! \post IsBool() == true */
+ GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; }
+
+ //@}
+
+ //!@name Object
+ //@{
+
+ //! Set this value as an empty object.
+ /*! \post IsObject() == true */
+ GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; }
+
+ //! Get the number of members in the object.
+ SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; }
+
+ //! Check whether the object is empty.
+ bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; }
+
+ //! Get a value from an object associated with the name.
+ /*! \pre IsObject() == true
+ \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType))
+ \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7.
+ Since 0.2, if the name is not correct, it will assert.
+ If user is unsure whether a member exists, user should use HasMember() first.
+ A better approach is to use FindMember().
+ \note Linear time complexity.
+ */
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(GenericValue&)) operator[](T* name) {
+ GenericValue n(StringRef(name));
+ return (*this)[n];
+ }
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast<GenericValue&>(*this)[name]; }
+
+ //! Get a value from an object associated with the name.
+ /*! \pre IsObject() == true
+ \tparam SourceAllocator Allocator of the \c name value
+
+ \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen().
+ And it can also handle strings with embedded null characters.
+
+ \note Linear time complexity.
+ */
+ template <typename SourceAllocator>
+ GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) {
+ MemberIterator member = FindMember(name);
+ if (member != MemberEnd())
+ return member->value;
+ else {
+ RAPIDJSON_ASSERT(false); // see above note
+
+ // This will generate -Wexit-time-destructors in clang
+ // static GenericValue NullValue;
+ // return NullValue;
+
+ // Use static buffer and placement-new to prevent destruction
+ static char buffer[sizeof(GenericValue)];
+ return *new (buffer) GenericValue();
+ }
+ }
+ template <typename SourceAllocator>
+ const GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this)[name]; }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Get a value from an object associated with name (string object).
+ GenericValue& operator[](const std::basic_string<Ch>& name) { return (*this)[GenericValue(StringRef(name))]; }
+ const GenericValue& operator[](const std::basic_string<Ch>& name) const { return (*this)[GenericValue(StringRef(name))]; }
+#endif
+
+ //! Const member iterator
+ /*! \pre IsObject() == true */
+ ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); }
+ //! Const \em past-the-end member iterator
+ /*! \pre IsObject() == true */
+ ConstMemberIterator MemberEnd() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); }
+ //! Member iterator
+ /*! \pre IsObject() == true */
+ MemberIterator MemberBegin() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); }
+ //! \em Past-the-end member iterator
+ /*! \pre IsObject() == true */
+ MemberIterator MemberEnd() { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); }
+
+ //! Check whether a member exists in the object.
+ /*!
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Whether a member with that name exists.
+ \note It is better to use FindMember() directly if you need the obtain the value as well.
+ \note Linear time complexity.
+ */
+ bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Check whether a member exists in the object with string object.
+ /*!
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Whether a member with that name exists.
+ \note It is better to use FindMember() directly if you need the obtain the value as well.
+ \note Linear time complexity.
+ */
+ bool HasMember(const std::basic_string<Ch>& name) const { return FindMember(name) != MemberEnd(); }
+#endif
+
+ //! Check whether a member exists in the object with GenericValue name.
+ /*!
+ This version is faster because it does not need a StrLen(). It can also handle string with null character.
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Whether a member with that name exists.
+ \note It is better to use FindMember() directly if you need the obtain the value as well.
+ \note Linear time complexity.
+ */
+ template <typename SourceAllocator>
+ bool HasMember(const GenericValue<Encoding, SourceAllocator>& name) const { return FindMember(name) != MemberEnd(); }
+
+ //! Find member by name.
+ /*!
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Iterator to member, if it exists.
+ Otherwise returns \ref MemberEnd().
+
+ \note Earlier versions of Rapidjson returned a \c NULL pointer, in case
+ the requested member doesn't exist. For consistency with e.g.
+ \c std::map, this has been changed to MemberEnd() now.
+ \note Linear time complexity.
+ */
+ MemberIterator FindMember(const Ch* name) {
+ GenericValue n(StringRef(name));
+ return FindMember(n);
+ }
+
+ ConstMemberIterator FindMember(const Ch* name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+
+ //! Find member by name.
+ /*!
+ This version is faster because it does not need a StrLen(). It can also handle string with null character.
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Iterator to member, if it exists.
+ Otherwise returns \ref MemberEnd().
+
+ \note Earlier versions of Rapidjson returned a \c NULL pointer, in case
+ the requested member doesn't exist. For consistency with e.g.
+ \c std::map, this has been changed to MemberEnd() now.
+ \note Linear time complexity.
+ */
+ template <typename SourceAllocator>
+ MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+ RAPIDJSON_ASSERT(IsObject());
+ RAPIDJSON_ASSERT(name.IsString());
+ MemberIterator member = MemberBegin();
+ for ( ; member != MemberEnd(); ++member)
+ if (name.StringEqual(member->name))
+ break;
+ return member;
+ }
+ template <typename SourceAllocator> ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Find member by string object name.
+ /*!
+ \param name Member name to be searched.
+ \pre IsObject() == true
+ \return Iterator to member, if it exists.
+ Otherwise returns \ref MemberEnd().
+ */
+ MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(GenericValue(StringRef(name))); }
+ ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(GenericValue(StringRef(name))); }
+#endif
+
+ //! Add a member (name-value pair) to the object.
+ /*! \param name A string value as name of member.
+ \param value Value of any type.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \note The ownership of \c name and \c value will be transferred to this object on success.
+ \pre IsObject() && name.IsString()
+ \post name.IsNull() && value.IsNull()
+ \note Amortized Constant time complexity.
+ */
+ GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
+ RAPIDJSON_ASSERT(IsObject());
+ RAPIDJSON_ASSERT(name.IsString());
+
+ ObjectData& o = data_.o;
+ if (o.size >= o.capacity) {
+ if (o.capacity == 0) {
+ o.capacity = kDefaultObjectCapacity;
+ SetMembersPointer(reinterpret_cast<Member*>(allocator.Malloc(o.capacity * sizeof(Member))));
+ }
+ else {
+ SizeType oldCapacity = o.capacity;
+ o.capacity += (oldCapacity + 1) / 2; // grow by factor 1.5
+ SetMembersPointer(reinterpret_cast<Member*>(allocator.Realloc(GetMembersPointer(), oldCapacity * sizeof(Member), o.capacity * sizeof(Member))));
+ }
+ }
+ Member* members = GetMembersPointer();
+ members[o.size].name.RawAssign(name);
+ members[o.size].value.RawAssign(value);
+ o.size++;
+ return *this;
+ }
+
+ //! Add a constant string value as member (name-value pair) to the object.
+ /*! \param name A string value as name of member.
+ \param value constant string reference as value of member.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \pre IsObject()
+ \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
+ \note Amortized Constant time complexity.
+ */
+ GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) {
+ GenericValue v(value);
+ return AddMember(name, v, allocator);
+ }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Add a string object as member (name-value pair) to the object.
+ /*! \param name A string value as name of member.
+ \param value constant string reference as value of member.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \pre IsObject()
+ \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
+ \note Amortized Constant time complexity.
+ */
+ GenericValue& AddMember(GenericValue& name, std::basic_string<Ch>& value, Allocator& allocator) {
+ GenericValue v(value, allocator);
+ return AddMember(name, v, allocator);
+ }
+#endif
+
+ //! Add any primitive value as member (name-value pair) to the object.
+ /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+ \param name A string value as name of member.
+ \param value Value of primitive type \c T as value of member
+ \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \pre IsObject()
+
+ \note The source type \c T explicitly disallows all pointer types,
+ especially (\c const) \ref Ch*. This helps avoiding implicitly
+ referencing character strings with insufficient lifetime, use
+ \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref
+ AddMember(StringRefType, StringRefType, Allocator&).
+ All other pointer types would implicitly convert to \c bool,
+ use an explicit cast instead, if needed.
+ \note Amortized Constant time complexity.
+ */
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+ AddMember(GenericValue& name, T value, Allocator& allocator) {
+ GenericValue v(value);
+ return AddMember(name, v, allocator);
+ }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) {
+ return AddMember(name, value, allocator);
+ }
+ GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) {
+ return AddMember(name, value, allocator);
+ }
+ GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) {
+ return AddMember(name, value, allocator);
+ }
+ GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) {
+ GenericValue n(name);
+ return AddMember(n, value, allocator);
+ }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+
+
+ //! Add a member (name-value pair) to the object.
+ /*! \param name A constant string reference as name of member.
+ \param value Value of any type.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \note The ownership of \c value will be transferred to this object on success.
+ \pre IsObject()
+ \post value.IsNull()
+ \note Amortized Constant time complexity.
+ */
+ GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) {
+ GenericValue n(name);
+ return AddMember(n, value, allocator);
+ }
+
+ //! Add a constant string value as member (name-value pair) to the object.
+ /*! \param name A constant string reference as name of member.
+ \param value constant string reference as value of member.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \pre IsObject()
+ \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below.
+ \note Amortized Constant time complexity.
+ */
+ GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) {
+ GenericValue v(value);
+ return AddMember(name, v, allocator);
+ }
+
+ //! Add any primitive value as member (name-value pair) to the object.
+ /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+ \param name A constant string reference as name of member.
+ \param value Value of primitive type \c T as value of member
+ \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \pre IsObject()
+
+ \note The source type \c T explicitly disallows all pointer types,
+ especially (\c const) \ref Ch*. This helps avoiding implicitly
+ referencing character strings with insufficient lifetime, use
+ \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref
+ AddMember(StringRefType, StringRefType, Allocator&).
+ All other pointer types would implicitly convert to \c bool,
+ use an explicit cast instead, if needed.
+ \note Amortized Constant time complexity.
+ */
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+ AddMember(StringRefType name, T value, Allocator& allocator) {
+ GenericValue n(name);
+ return AddMember(n, value, allocator);
+ }
+
+ //! Remove all members in the object.
+ /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged.
+ \note Linear time complexity.
+ */
+ void RemoveAllMembers() {
+ RAPIDJSON_ASSERT(IsObject());
+ for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+ m->~Member();
+ data_.o.size = 0;
+ }
+
+ //! Remove a member in object by its name.
+ /*! \param name Name of member to be removed.
+ \return Whether the member existed.
+ \note This function may reorder the object members. Use \ref
+ EraseMember(ConstMemberIterator) if you need to preserve the
+ relative order of the remaining members.
+ \note Linear time complexity.
+ */
+ bool RemoveMember(const Ch* name) {
+ GenericValue n(StringRef(name));
+ return RemoveMember(n);
+ }
+
+#if RAPIDJSON_HAS_STDSTRING
+ bool RemoveMember(const std::basic_string<Ch>& name) { return RemoveMember(GenericValue(StringRef(name))); }
+#endif
+
+ template <typename SourceAllocator>
+ bool RemoveMember(const GenericValue<Encoding, SourceAllocator>& name) {
+ MemberIterator m = FindMember(name);
+ if (m != MemberEnd()) {
+ RemoveMember(m);
+ return true;
+ }
+ else
+ return false;
+ }
+
+ //! Remove a member in object by iterator.
+ /*! \param m member iterator (obtained by FindMember() or MemberBegin()).
+ \return the new iterator after removal.
+ \note This function may reorder the object members. Use \ref
+ EraseMember(ConstMemberIterator) if you need to preserve the
+ relative order of the remaining members.
+ \note Constant time complexity.
+ */
+ MemberIterator RemoveMember(MemberIterator m) {
+ RAPIDJSON_ASSERT(IsObject());
+ RAPIDJSON_ASSERT(data_.o.size > 0);
+ RAPIDJSON_ASSERT(GetMembersPointer() != 0);
+ RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd());
+
+ MemberIterator last(GetMembersPointer() + (data_.o.size - 1));
+ if (data_.o.size > 1 && m != last)
+ *m = *last; // Move the last one to this place
+ else
+ m->~Member(); // Only one left, just destroy
+ --data_.o.size;
+ return m;
+ }
+
+ //! Remove a member from an object by iterator.
+ /*! \param pos iterator to the member to remove
+ \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd()
+ \return Iterator following the removed element.
+ If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned.
+ \note This function preserves the relative order of the remaining object
+ members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator).
+ \note Linear time complexity.
+ */
+ MemberIterator EraseMember(ConstMemberIterator pos) {
+ return EraseMember(pos, pos +1);
+ }
+
+ //! Remove members in the range [first, last) from an object.
+ /*! \param first iterator to the first member to remove
+ \param last iterator following the last member to remove
+ \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd()
+ \return Iterator following the last removed element.
+ \note This function preserves the relative order of the remaining object
+ members.
+ \note Linear time complexity.
+ */
+ MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) {
+ RAPIDJSON_ASSERT(IsObject());
+ RAPIDJSON_ASSERT(data_.o.size > 0);
+ RAPIDJSON_ASSERT(GetMembersPointer() != 0);
+ RAPIDJSON_ASSERT(first >= MemberBegin());
+ RAPIDJSON_ASSERT(first <= last);
+ RAPIDJSON_ASSERT(last <= MemberEnd());
+
+ MemberIterator pos = MemberBegin() + (first - MemberBegin());
+ for (MemberIterator itr = pos; itr != last; ++itr)
+ itr->~Member();
+ std::memmove(&*pos, &*last, static_cast<size_t>(MemberEnd() - last) * sizeof(Member));
+ data_.o.size -= static_cast<SizeType>(last - first);
+ return pos;
+ }
+
+ //! Erase a member in object by its name.
+ /*! \param name Name of member to be removed.
+ \return Whether the member existed.
+ \note Linear time complexity.
+ */
+ bool EraseMember(const Ch* name) {
+ GenericValue n(StringRef(name));
+ return EraseMember(n);
+ }
+
+#if RAPIDJSON_HAS_STDSTRING
+ bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); }
+#endif
+
+ template <typename SourceAllocator>
+ bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) {
+ MemberIterator m = FindMember(name);
+ if (m != MemberEnd()) {
+ EraseMember(m);
+ return true;
+ }
+ else
+ return false;
+ }
+
+ Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
+ ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
+
+ //@}
+
+ //!@name Array
+ //@{
+
+ //! Set this value as an empty array.
+ /*! \post IsArray == true */
+ GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; }
+
+ //! Get the number of elements in array.
+ SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; }
+
+ //! Get the capacity of array.
+ SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; }
+
+ //! Check whether the array is empty.
+ bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; }
+
+ //! Remove all elements in the array.
+ /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged.
+ \note Linear time complexity.
+ */
+ void Clear() {
+ RAPIDJSON_ASSERT(IsArray());
+ GenericValue* e = GetElementsPointer();
+ for (GenericValue* v = e; v != e + data_.a.size; ++v)
+ v->~GenericValue();
+ data_.a.size = 0;
+ }
+
+ //! Get an element from array by index.
+ /*! \pre IsArray() == true
+ \param index Zero-based index of element.
+ \see operator[](T*)
+ */
+ GenericValue& operator[](SizeType index) {
+ RAPIDJSON_ASSERT(IsArray());
+ RAPIDJSON_ASSERT(index < data_.a.size);
+ return GetElementsPointer()[index];
+ }
+ const GenericValue& operator[](SizeType index) const { return const_cast<GenericValue&>(*this)[index]; }
+
+ //! Element iterator
+ /*! \pre IsArray() == true */
+ ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); }
+ //! \em Past-the-end element iterator
+ /*! \pre IsArray() == true */
+ ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; }
+ //! Constant element iterator
+ /*! \pre IsArray() == true */
+ ConstValueIterator Begin() const { return const_cast<GenericValue&>(*this).Begin(); }
+ //! Constant \em past-the-end element iterator
+ /*! \pre IsArray() == true */
+ ConstValueIterator End() const { return const_cast<GenericValue&>(*this).End(); }
+
+ //! Request the array to have enough capacity to store elements.
+ /*! \param newCapacity The capacity that the array at least need to have.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \note Linear time complexity.
+ */
+ GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) {
+ RAPIDJSON_ASSERT(IsArray());
+ if (newCapacity > data_.a.capacity) {
+ SetElementsPointer(reinterpret_cast<GenericValue*>(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue))));
+ data_.a.capacity = newCapacity;
+ }
+ return *this;
+ }
+
+ //! Append a GenericValue at the end of the array.
+ /*! \param value Value to be appended.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \pre IsArray() == true
+ \post value.IsNull() == true
+ \return The value itself for fluent API.
+ \note The ownership of \c value will be transferred to this array on success.
+ \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+ \note Amortized constant time complexity.
+ */
+ GenericValue& PushBack(GenericValue& value, Allocator& allocator) {
+ RAPIDJSON_ASSERT(IsArray());
+ if (data_.a.size >= data_.a.capacity)
+ Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator);
+ GetElementsPointer()[data_.a.size++].RawAssign(value);
+ return *this;
+ }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericValue& PushBack(GenericValue&& value, Allocator& allocator) {
+ return PushBack(value, allocator);
+ }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+
+ //! Append a constant string reference at the end of the array.
+ /*! \param value Constant string reference to be appended.
+ \param allocator Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator().
+ \pre IsArray() == true
+ \return The value itself for fluent API.
+ \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+ \note Amortized constant time complexity.
+ \see GenericStringRef
+ */
+ GenericValue& PushBack(StringRefType value, Allocator& allocator) {
+ return (*this).template PushBack<StringRefType>(value, allocator);
+ }
+
+ //! Append a primitive value at the end of the array.
+ /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+ \param value Value of primitive type T to be appended.
+ \param allocator Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+ \pre IsArray() == true
+ \return The value itself for fluent API.
+ \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+
+ \note The source type \c T explicitly disallows all pointer types,
+ especially (\c const) \ref Ch*. This helps avoiding implicitly
+ referencing character strings with insufficient lifetime, use
+ \ref PushBack(GenericValue&, Allocator&) or \ref
+ PushBack(StringRefType, Allocator&).
+ All other pointer types would implicitly convert to \c bool,
+ use an explicit cast instead, if needed.
+ \note Amortized constant time complexity.
+ */
+ template <typename T>
+ RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+ PushBack(T value, Allocator& allocator) {
+ GenericValue v(value);
+ return PushBack(v, allocator);
+ }
+
+ //! Remove the last element in the array.
+ /*!
+ \note Constant time complexity.
+ */
+ GenericValue& PopBack() {
+ RAPIDJSON_ASSERT(IsArray());
+ RAPIDJSON_ASSERT(!Empty());
+ GetElementsPointer()[--data_.a.size].~GenericValue();
+ return *this;
+ }
+
+ //! Remove an element of array by iterator.
+ /*!
+ \param pos iterator to the element to remove
+ \pre IsArray() == true && \ref Begin() <= \c pos < \ref End()
+ \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned.
+ \note Linear time complexity.
+ */
+ ValueIterator Erase(ConstValueIterator pos) {
+ return Erase(pos, pos + 1);
+ }
+
+ //! Remove elements in the range [first, last) of the array.
+ /*!
+ \param first iterator to the first element to remove
+ \param last iterator following the last element to remove
+ \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End()
+ \return Iterator following the last removed element.
+ \note Linear time complexity.
+ */
+ ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) {
+ RAPIDJSON_ASSERT(IsArray());
+ RAPIDJSON_ASSERT(data_.a.size > 0);
+ RAPIDJSON_ASSERT(GetElementsPointer() != 0);
+ RAPIDJSON_ASSERT(first >= Begin());
+ RAPIDJSON_ASSERT(first <= last);
+ RAPIDJSON_ASSERT(last <= End());
+ ValueIterator pos = Begin() + (first - Begin());
+ for (ValueIterator itr = pos; itr != last; ++itr)
+ itr->~GenericValue();
+ std::memmove(pos, last, static_cast<size_t>(End() - last) * sizeof(GenericValue));
+ data_.a.size -= static_cast<SizeType>(last - first);
+ return pos;
+ }
+
+ Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); }
+ ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); }
+
+ //@}
+
+ //!@name Number
+ //@{
+
+ int GetInt() const { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag); return data_.n.i.i; }
+ unsigned GetUint() const { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag); return data_.n.u.u; }
+ int64_t GetInt64() const { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; }
+ uint64_t GetUint64() const { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; }
+
+ //! Get the value as double type.
+ /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless.
+ */
+ double GetDouble() const {
+ RAPIDJSON_ASSERT(IsNumber());
+ if ((data_.f.flags & kDoubleFlag) != 0) return data_.n.d; // exact type, no conversion.
+ if ((data_.f.flags & kIntFlag) != 0) return data_.n.i.i; // int -> double
+ if ((data_.f.flags & kUintFlag) != 0) return data_.n.u.u; // unsigned -> double
+ if ((data_.f.flags & kInt64Flag) != 0) return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision)
+ RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0); return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision)
+ }
+
+ //! Get the value as float type.
+ /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless.
+ */
+ float GetFloat() const {
+ return static_cast<float>(GetDouble());
+ }
+
+ GenericValue& SetInt(int i) { this->~GenericValue(); new (this) GenericValue(i); return *this; }
+ GenericValue& SetUint(unsigned u) { this->~GenericValue(); new (this) GenericValue(u); return *this; }
+ GenericValue& SetInt64(int64_t i64) { this->~GenericValue(); new (this) GenericValue(i64); return *this; }
+ GenericValue& SetUint64(uint64_t u64) { this->~GenericValue(); new (this) GenericValue(u64); return *this; }
+ GenericValue& SetDouble(double d) { this->~GenericValue(); new (this) GenericValue(d); return *this; }
+ GenericValue& SetFloat(float f) { this->~GenericValue(); new (this) GenericValue(static_cast<double>(f)); return *this; }
+
+ //@}
+
+ //!@name String
+ //@{
+
+ const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); }
+
+ //! Get the length of string.
+ /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
+ */
+ SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); }
+
+ //! Set this value as a string without copying source string.
+ /*! This version has better performance with supplied length, and also support string containing null character.
+ \param s source string pointer.
+ \param length The length of source string, excluding the trailing null terminator.
+ \return The value itself for fluent API.
+ \post IsString() == true && GetString() == s && GetStringLength() == length
+ \see SetString(StringRefType)
+ */
+ GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); }
+
+ //! Set this value as a string without copying source string.
+ /*! \param s source string reference
+ \return The value itself for fluent API.
+ \post IsString() == true && GetString() == s && GetStringLength() == s.length
+ */
+ GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; }
+
+ //! Set this value as a string by copying from source string.
+ /*! This version has better performance with supplied length, and also support string containing null character.
+ \param s source string.
+ \param length The length of source string, excluding the trailing null terminator.
+ \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+ */
+ GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { this->~GenericValue(); SetStringRaw(StringRef(s, length), allocator); return *this; }
+
+ //! Set this value as a string by copying from source string.
+ /*! \param s source string.
+ \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+ */
+ GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(s, internal::StrLen(s), allocator); }
+
+#if RAPIDJSON_HAS_STDSTRING
+ //! Set this value as a string by copying from source string.
+ /*! \param s source string.
+ \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+ \return The value itself for fluent API.
+ \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size()
+ \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+ */
+ GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(s.data(), SizeType(s.size()), allocator); }
+#endif
+
+ //@}
+
+ //!@name Array
+ //@{
+
+ //! Templated version for checking whether this value is type T.
+ /*!
+ \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string<Ch>
+ */
+ template <typename T>
+ bool Is() const { return internal::TypeHelper<ValueType, T>::Is(*this); }
+
+ template <typename T>
+ T Get() const { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+ template <typename T>
+ T Get() { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+ template<typename T>
+ ValueType& Set(const T& data) { return internal::TypeHelper<ValueType, T>::Set(*this, data); }
+
+ template<typename T>
+ ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator); }
+
+ //@}
+
+ //! Generate events of this value to a Handler.
+ /*! This function adopts the GoF visitor pattern.
+ Typical usage is to output this JSON value as JSON text via Writer, which is a Handler.
+ It can also be used to deep clone this value via GenericDocument, which is also a Handler.
+ \tparam Handler type of handler.
+ \param handler An object implementing concept Handler.
+ */
+ template <typename Handler>
+ bool Accept(Handler& handler) const {
+ switch(GetType()) {
+ case kNullType: return handler.Null();
+ case kFalseType: return handler.Bool(false);
+ case kTrueType: return handler.Bool(true);
+
+ case kObjectType:
+ if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
+ return false;
+ for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
+ RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator.
+ if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0)))
+ return false;
+ if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
+ return false;
+ }
+ return handler.EndObject(data_.o.size);
+
+ case kArrayType:
+ if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
+ return false;
+ for (const GenericValue* v = Begin(); v != End(); ++v)
+ if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
+ return false;
+ return handler.EndArray(data_.a.size);
+
+ case kStringType:
+ return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0);
+
+ default:
+ RAPIDJSON_ASSERT(GetType() == kNumberType);
+ if (IsDouble()) return handler.Double(data_.n.d);
+ else if (IsInt()) return handler.Int(data_.n.i.i);
+ else if (IsUint()) return handler.Uint(data_.n.u.u);
+ else if (IsInt64()) return handler.Int64(data_.n.i64);
+ else return handler.Uint64(data_.n.u64);
+ }
+ }
+
+private:
+ template <typename, typename> friend class GenericValue;
+ template <typename, typename, typename> friend class GenericDocument;
+
+ enum {
+ kBoolFlag = 0x0008,
+ kNumberFlag = 0x0010,
+ kIntFlag = 0x0020,
+ kUintFlag = 0x0040,
+ kInt64Flag = 0x0080,
+ kUint64Flag = 0x0100,
+ kDoubleFlag = 0x0200,
+ kStringFlag = 0x0400,
+ kCopyFlag = 0x0800,
+ kInlineStrFlag = 0x1000,
+
+ // Initial flags of different types.
+ kNullFlag = kNullType,
+ kTrueFlag = kTrueType | kBoolFlag,
+ kFalseFlag = kFalseType | kBoolFlag,
+ kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag,
+ kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag,
+ kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag,
+ kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag,
+ kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag,
+ kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag,
+ kConstStringFlag = kStringType | kStringFlag,
+ kCopyStringFlag = kStringType | kStringFlag | kCopyFlag,
+ kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag,
+ kObjectFlag = kObjectType,
+ kArrayFlag = kArrayType,
+
+ kTypeMask = 0x07
+ };
+
+ static const SizeType kDefaultArrayCapacity = 16;
+ static const SizeType kDefaultObjectCapacity = 16;
+
+ struct Flag {
+#if RAPIDJSON_48BITPOINTER_OPTIMIZATION
+ char payload[sizeof(SizeType) * 2 + 6]; // 2 x SizeType + lower 48-bit pointer
+#elif RAPIDJSON_64BIT
+ char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes
+#else
+ char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes
+#endif
+ uint16_t flags;
+ };
+
+ struct String {
+ SizeType length;
+ SizeType hashcode; //!< reserved
+ const Ch* str;
+ }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+ // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars
+ // (excluding the terminating zero) and store a value to determine the length of the contained
+ // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string
+ // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as
+ // the string terminator as well. For getting the string length back from that value just use
+ // "MaxSize - str[LenPos]".
+ // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode,
+ // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings).
+ struct ShortString {
+ enum { MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize };
+ Ch str[MaxChars];
+
+ inline static bool Usable(SizeType len) { return (MaxSize >= len); }
+ inline void SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize - len); }
+ inline SizeType GetLength() const { return static_cast<SizeType>(MaxSize - str[LenPos]); }
+ }; // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+ // By using proper binary layout, retrieval of different integer types do not need conversions.
+ union Number {
+#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
+ struct I {
+ int i;
+ char padding[4];
+ }i;
+ struct U {
+ unsigned u;
+ char padding2[4];
+ }u;
+#else
+ struct I {
+ char padding[4];
+ int i;
+ }i;
+ struct U {
+ char padding2[4];
+ unsigned u;
+ }u;
+#endif
+ int64_t i64;
+ uint64_t u64;
+ double d;
+ }; // 8 bytes
+
+ struct ObjectData {
+ SizeType size;
+ SizeType capacity;
+ Member* members;
+ }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+ struct ArrayData {
+ SizeType size;
+ SizeType capacity;
+ GenericValue* elements;
+ }; // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+ union Data {
+ String s;
+ ShortString ss;
+ Number n;
+ ObjectData o;
+ ArrayData a;
+ Flag f;
+ }; // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+ RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); }
+ RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); }
+ RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); }
+ RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); }
+ RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); }
+ RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); }
+
+ // Initialize this value as array with initial data, without calling destructor.
+ void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) {
+ data_.f.flags = kArrayFlag;
+ if (count) {
+ GenericValue* e = static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+ SetElementsPointer(e);
+ std::memcpy(e, values, count * sizeof(GenericValue));
+ }
+ else
+ SetElementsPointer(0);
+ data_.a.size = data_.a.capacity = count;
+ }
+
+ //! Initialize this value as object with initial data, without calling destructor.
+ void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) {
+ data_.f.flags = kObjectFlag;
+ if (count) {
+ Member* m = static_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+ SetMembersPointer(m);
+ std::memcpy(m, members, count * sizeof(Member));
+ }
+ else
+ SetMembersPointer(0);
+ data_.o.size = data_.o.capacity = count;
+ }
+
+ //! Initialize this value as constant string, without calling destructor.
+ void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT {
+ data_.f.flags = kConstStringFlag;
+ SetStringPointer(s);
+ data_.s.length = s.length;
+ }
+
+ //! Initialize this value as copy string with initial data, without calling destructor.
+ void SetStringRaw(StringRefType s, Allocator& allocator) {
+ Ch* str = 0;
+ if (ShortString::Usable(s.length)) {
+ data_.f.flags = kShortStringFlag;
+ data_.ss.SetLength(s.length);
+ str = data_.ss.str;
+ } else {
+ data_.f.flags = kCopyStringFlag;
+ data_.s.length = s.length;
+ str = static_cast<Ch *>(allocator.Malloc((s.length + 1) * sizeof(Ch)));
+ SetStringPointer(str);
+ }
+ std::memcpy(str, s, s.length * sizeof(Ch));
+ str[s.length] = '\0';
+ }
+
+ //! Assignment without calling destructor
+ void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
+ data_ = rhs.data_;
+ // data_.f.flags = rhs.data_.f.flags;
+ rhs.data_.f.flags = kNullFlag;
+ }
+
+ template <typename SourceAllocator>
+ bool StringEqual(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+ RAPIDJSON_ASSERT(IsString());
+ RAPIDJSON_ASSERT(rhs.IsString());
+
+ const SizeType len1 = GetStringLength();
+ const SizeType len2 = rhs.GetStringLength();
+ if(len1 != len2) { return false; }
+
+ const Ch* const str1 = GetString();
+ const Ch* const str2 = rhs.GetString();
+ if(str1 == str2) { return true; } // fast path for constant string
+
+ return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0);
+ }
+
+ Data data_;
+};
+
+//! GenericValue with UTF8 encoding
+typedef GenericValue<UTF8<> > Value;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericDocument
+
+//! A document for parsing JSON text as DOM.
+/*!
+ \note implements Handler concept
+ \tparam Encoding Encoding for both parsing and string storage.
+ \tparam Allocator Allocator for allocating memory for the DOM
+ \tparam StackAllocator Allocator for allocating memory for stack during parsing.
+ \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor. To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue.
+*/
+template <typename Encoding, typename Allocator = MemoryPoolAllocator<>, typename StackAllocator = CrtAllocator>
+class GenericDocument : public GenericValue<Encoding, Allocator> {
+public:
+ typedef typename Encoding::Ch Ch; //!< Character type derived from Encoding.
+ typedef GenericValue<Encoding, Allocator> ValueType; //!< Value type of the document.
+ typedef Allocator AllocatorType; //!< Allocator type from template parameter.
+
+ //! Constructor
+ /*! Creates an empty document of specified type.
+ \param type Mandatory type of object to create.
+ \param allocator Optional allocator for allocating memory.
+ \param stackCapacity Optional initial capacity of stack in bytes.
+ \param stackAllocator Optional allocator for allocating memory for stack.
+ */
+ explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) :
+ GenericValue<Encoding, Allocator>(type), allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+ {
+ if (!allocator_)
+ ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+ }
+
+ //! Constructor
+ /*! Creates an empty document which type is Null.
+ \param allocator Optional allocator for allocating memory.
+ \param stackCapacity Optional initial capacity of stack in bytes.
+ \param stackAllocator Optional allocator for allocating memory for stack.
+ */
+ GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) :
+ allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+ {
+ if (!allocator_)
+ ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+ }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ //! Move constructor in C++11
+ GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
+ : ValueType(std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document
+ allocator_(rhs.allocator_),
+ ownAllocator_(rhs.ownAllocator_),
+ stack_(std::move(rhs.stack_)),
+ parseResult_(rhs.parseResult_)
+ {
+ rhs.allocator_ = 0;
+ rhs.ownAllocator_ = 0;
+ rhs.parseResult_ = ParseResult();
+ }
+#endif
+
+ ~GenericDocument() {
+ Destroy();
+ }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ //! Move assignment in C++11
+ GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
+ {
+ // The cast to ValueType is necessary here, because otherwise it would
+ // attempt to call GenericValue's templated assignment operator.
+ ValueType::operator=(std::forward<ValueType>(rhs));
+
+ // Calling the destructor here would prematurely call stack_'s destructor
+ Destroy();
+
+ allocator_ = rhs.allocator_;
+ ownAllocator_ = rhs.ownAllocator_;
+ stack_ = std::move(rhs.stack_);
+ parseResult_ = rhs.parseResult_;
+
+ rhs.allocator_ = 0;
+ rhs.ownAllocator_ = 0;
+ rhs.parseResult_ = ParseResult();
+
+ return *this;
+ }
+#endif
+
+ //! Exchange the contents of this document with those of another.
+ /*!
+ \param rhs Another document.
+ \note Constant complexity.
+ \see GenericValue::Swap
+ */
+ GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT {
+ ValueType::Swap(rhs);
+ stack_.Swap(rhs.stack_);
+ internal::Swap(allocator_, rhs.allocator_);
+ internal::Swap(ownAllocator_, rhs.ownAllocator_);
+ internal::Swap(parseResult_, rhs.parseResult_);
+ return *this;
+ }
+
+ //! free-standing swap function helper
+ /*!
+ Helper function to enable support for common swap implementation pattern based on \c std::swap:
+ \code
+ void swap(MyClass& a, MyClass& b) {
+ using std::swap;
+ swap(a.doc, b.doc);
+ // ...
+ }
+ \endcode
+ \see Swap()
+ */
+ friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
+ //! Populate this document by a generator which produces SAX events.
+ /*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype.
+ \param g Generator functor which sends SAX events to the parameter.
+ \return The document itself for fluent API.
+ */
+ template <typename Generator>
+ GenericDocument& Populate(Generator& g) {
+ ClearStackOnExit scope(*this);
+ if (g(*this)) {
+ RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
+ ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+ }
+ return *this;
+ }
+
+ //!@name Parse from stream
+ //!@{
+
+ //! Parse JSON text from an input stream (with Encoding conversion)
+ /*! \tparam parseFlags Combination of \ref ParseFlag.
+ \tparam SourceEncoding Encoding of input stream
+ \tparam InputStream Type of input stream, implementing Stream concept
+ \param is Input stream to be parsed.
+ \return The document itself for fluent API.
+ */
+ template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
+ GenericDocument& ParseStream(InputStream& is) {
+ GenericReader<SourceEncoding, Encoding, StackAllocator> reader(
+ stack_.HasAllocator() ? &stack_.GetAllocator() : 0);
+ ClearStackOnExit scope(*this);
+ parseResult_ = reader.template Parse<parseFlags>(is, *this);
+ if (parseResult_) {
+ RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
+ ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+ }
+ return *this;
+ }
+
+ //! Parse JSON text from an input stream
+ /*! \tparam parseFlags Combination of \ref ParseFlag.
+ \tparam InputStream Type of input stream, implementing Stream concept
+ \param is Input stream to be parsed.
+ \return The document itself for fluent API.
+ */
+ template <unsigned parseFlags, typename InputStream>
+ GenericDocument& ParseStream(InputStream& is) {
+ return ParseStream<parseFlags, Encoding, InputStream>(is);
+ }
+
+ //! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
+ /*! \tparam InputStream Type of input stream, implementing Stream concept
+ \param is Input stream to be parsed.
+ \return The document itself for fluent API.
+ */
+ template <typename InputStream>
+ GenericDocument& ParseStream(InputStream& is) {
+ return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is);
+ }
+ //!@}
+
+ //!@name Parse in-place from mutable string
+ //!@{
+
+ //! Parse JSON text from a mutable string
+ /*! \tparam parseFlags Combination of \ref ParseFlag.
+ \param str Mutable zero-terminated string to be parsed.
+ \return The document itself for fluent API.
+ */
+ template <unsigned parseFlags>
+ GenericDocument& ParseInsitu(Ch* str) {
+ GenericInsituStringStream<Encoding> s(str);
+ return ParseStream<parseFlags | kParseInsituFlag>(s);
+ }
+
+ //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
+ /*! \param str Mutable zero-terminated string to be parsed.
+ \return The document itself for fluent API.
+ */
+ GenericDocument& ParseInsitu(Ch* str) {
+ return ParseInsitu<kParseDefaultFlags>(str);
+ }
+ //!@}
+
+ //!@name Parse from read-only string
+ //!@{
+
+ //! Parse JSON text from a read-only string (with Encoding conversion)
+ /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
+ \tparam SourceEncoding Transcoding from input Encoding
+ \param str Read-only zero-terminated string to be parsed.
+ */
+ template <unsigned parseFlags, typename SourceEncoding>
+ GenericDocument& Parse(const typename SourceEncoding::Ch* str) {
+ RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
+ GenericStringStream<SourceEncoding> s(str);
+ return ParseStream<parseFlags, SourceEncoding>(s);
+ }
+
+ //! Parse JSON text from a read-only string
+ /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
+ \param str Read-only zero-terminated string to be parsed.
+ */
+ template <unsigned parseFlags>
+ GenericDocument& Parse(const Ch* str) {
+ return Parse<parseFlags, Encoding>(str);
+ }
+
+ //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
+ /*! \param str Read-only zero-terminated string to be parsed.
+ */
+ GenericDocument& Parse(const Ch* str) {
+ return Parse<kParseDefaultFlags>(str);
+ }
+
+ template <unsigned parseFlags, typename SourceEncoding>
+ GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
+ RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
+ MemoryStream ms(reinterpret_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
+ EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
+ ParseStream<parseFlags, SourceEncoding>(is);
+ return *this;
+ }
+
+ template <unsigned parseFlags>
+ GenericDocument& Parse(const Ch* str, size_t length) {
+ return Parse<parseFlags, Encoding>(str, length);
+ }
+
+ GenericDocument& Parse(const Ch* str, size_t length) {
+ return Parse<kParseDefaultFlags>(str, length);
+ }
+
+#if RAPIDJSON_HAS_STDSTRING
+ template <unsigned parseFlags, typename SourceEncoding>
+ GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) {
+ // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
+ return Parse<parseFlags, SourceEncoding>(str.c_str());
+ }
+
+ template <unsigned parseFlags>
+ GenericDocument& Parse(const std::basic_string<Ch>& str) {
+ return Parse<parseFlags, Encoding>(str.c_str());
+ }
+
+ GenericDocument& Parse(const std::basic_string<Ch>& str) {
+ return Parse<kParseDefaultFlags>(str);
+ }
+#endif // RAPIDJSON_HAS_STDSTRING
+
+ //!@}
+
+ //!@name Handling parse errors
+ //!@{
+
+ //! Whether a parse error has occured in the last parsing.
+ bool HasParseError() const { return parseResult_.IsError(); }
+
+ //! Get the \ref ParseErrorCode of last parsing.
+ ParseErrorCode GetParseError() const { return parseResult_.Code(); }
+
+ //! Get the position of last parsing error in input, 0 otherwise.
+ size_t GetErrorOffset() const { return parseResult_.Offset(); }
+
+ //! Implicit conversion to get the last parse result
+#ifndef __clang // -Wdocumentation
+ /*! \return \ref ParseResult of the last parse operation
+
+ \code
+ Document doc;
+ ParseResult ok = doc.Parse(json);
+ if (!ok)
+ printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset());
+ \endcode
+ */
+#endif
+ operator ParseResult() const { return parseResult_; }
+ //!@}
+
+ //! Get the allocator of this document.
+ Allocator& GetAllocator() {
+ RAPIDJSON_ASSERT(allocator_);
+ return *allocator_;
+ }
+
+ //! Get the capacity of stack in bytes.
+ size_t GetStackCapacity() const { return stack_.GetCapacity(); }
+
+private:
+ // clear stack on any exit from ParseStream, e.g. due to exception
+ struct ClearStackOnExit {
+ explicit ClearStackOnExit(GenericDocument& d) : d_(d) {}
+ ~ClearStackOnExit() { d_.ClearStack(); }
+ private:
+ ClearStackOnExit(const ClearStackOnExit&);
+ ClearStackOnExit& operator=(const ClearStackOnExit&);
+ GenericDocument& d_;
+ };
+
+ // callers of the following private Handler functions
+ // template <typename,typename,typename> friend class GenericReader; // for parsing
+ template <typename, typename> friend class GenericValue; // for deep copying
+
+public:
+ // Implementation of Handler
+ bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; }
+ bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; }
+ bool Int(int i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+ bool Uint(unsigned i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+ bool Int64(int64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+ bool Uint64(uint64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+ bool Double(double d) { new (stack_.template Push<ValueType>()) ValueType(d); return true; }
+
+ bool RawNumber(const Ch* str, SizeType length, bool copy) {
+ if (copy)
+ new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+ else
+ new (stack_.template Push<ValueType>()) ValueType(str, length);
+ return true;
+ }
+
+ bool String(const Ch* str, SizeType length, bool copy) {
+ if (copy)
+ new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+ else
+ new (stack_.template Push<ValueType>()) ValueType(str, length);
+ return true;
+ }
+
+ bool StartObject() { new (stack_.template Push<ValueType>()) ValueType(kObjectType); return true; }
+
+ bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); }
+
+ bool EndObject(SizeType memberCount) {
+ typename ValueType::Member* members = stack_.template Pop<typename ValueType::Member>(memberCount);
+ stack_.template Top<ValueType>()->SetObjectRaw(members, memberCount, GetAllocator());
+ return true;
+ }
+
+ bool StartArray() { new (stack_.template Push<ValueType>()) ValueType(kArrayType); return true; }
+
+ bool EndArray(SizeType elementCount) {
+ ValueType* elements = stack_.template Pop<ValueType>(elementCount);
+ stack_.template Top<ValueType>()->SetArrayRaw(elements, elementCount, GetAllocator());
+ return true;
+ }
+
+private:
+ //! Prohibit copying
+ GenericDocument(const GenericDocument&);
+ //! Prohibit assignment
+ GenericDocument& operator=(const GenericDocument&);
+
+ void ClearStack() {
+ if (Allocator::kNeedFree)
+ while (stack_.GetSize() > 0) // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects)
+ (stack_.template Pop<ValueType>(1))->~ValueType();
+ else
+ stack_.Clear();
+ stack_.ShrinkToFit();
+ }
+
+ void Destroy() {
+ RAPIDJSON_DELETE(ownAllocator_);
+ }
+
+ static const size_t kDefaultStackCapacity = 1024;
+ Allocator* allocator_;
+ Allocator* ownAllocator_;
+ internal::Stack<StackAllocator> stack_;
+ ParseResult parseResult_;
+};
+
+//! GenericDocument with UTF8 encoding
+typedef GenericDocument<UTF8<> > Document;
+
+//! Helper class for accessing Value of array type.
+/*!
+ Instance of this helper class is obtained by \c GenericValue::GetArray().
+ In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericArray {
+public:
+ typedef GenericArray<true, ValueT> ConstArray;
+ typedef GenericArray<false, ValueT> Array;
+ typedef ValueT PlainType;
+ typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+ typedef ValueType* ValueIterator; // This may be const or non-const iterator
+ typedef const ValueT* ConstValueIterator;
+ typedef typename ValueType::AllocatorType AllocatorType;
+ typedef typename ValueType::StringRefType StringRefType;
+
+ template <typename, typename>
+ friend class GenericValue;
+
+ GenericArray(const GenericArray& rhs) : value_(rhs.value_) {}
+ GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; }
+ ~GenericArray() {}
+
+ SizeType Size() const { return value_.Size(); }
+ SizeType Capacity() const { return value_.Capacity(); }
+ bool Empty() const { return value_.Empty(); }
+ void Clear() const { value_.Clear(); }
+ ValueType& operator[](SizeType index) const { return value_[index]; }
+ ValueIterator Begin() const { return value_.Begin(); }
+ ValueIterator End() const { return value_.End(); }
+ GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; }
+ GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+ template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+ GenericArray PopBack() const { value_.PopBack(); return *this; }
+ ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); }
+ ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+ ValueIterator begin() const { return value_.Begin(); }
+ ValueIterator end() const { return value_.End(); }
+#endif
+
+private:
+ GenericArray();
+ GenericArray(ValueType& value) : value_(value) {}
+ ValueType& value_;
+};
+
+//! Helper class for accessing Value of object type.
+/*!
+ Instance of this helper class is obtained by \c GenericValue::GetObject().
+ In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericObject {
+public:
+ typedef GenericObject<true, ValueT> ConstObject;
+ typedef GenericObject<false, ValueT> Object;
+ typedef ValueT PlainType;
+ typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+ typedef GenericMemberIterator<Const, typename ValueT::EncodingType, typename ValueT::AllocatorType> MemberIterator; // This may be const or non-const iterator
+ typedef GenericMemberIterator<true, typename ValueT::EncodingType, typename ValueT::AllocatorType> ConstMemberIterator;
+ typedef typename ValueType::AllocatorType AllocatorType;
+ typedef typename ValueType::StringRefType StringRefType;
+ typedef typename ValueType::EncodingType EncodingType;
+ typedef typename ValueType::Ch Ch;
+
+ template <typename, typename>
+ friend class GenericValue;
+
+ GenericObject(const GenericObject& rhs) : value_(rhs.value_) {}
+ GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; }
+ ~GenericObject() {}
+
+ SizeType MemberCount() const { return value_.MemberCount(); }
+ bool ObjectEmpty() const { return value_.ObjectEmpty(); }
+ template <typename T> ValueType& operator[](T* name) const { return value_[name]; }
+ template <typename SourceAllocator> ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const { return value_[name]; }
+#if RAPIDJSON_HAS_STDSTRING
+ ValueType& operator[](const std::basic_string<Ch>& name) const { return value_[name]; }
+#endif
+ MemberIterator MemberBegin() const { return value_.MemberBegin(); }
+ MemberIterator MemberEnd() const { return value_.MemberEnd(); }
+ bool HasMember(const Ch* name) const { return value_.HasMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+ bool HasMember(const std::basic_string<Ch>& name) const { return value_.HasMember(name); }
+#endif
+ template <typename SourceAllocator> bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.HasMember(name); }
+ MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); }
+ template <typename SourceAllocator> MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.FindMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+ MemberIterator FindMember(const std::basic_string<Ch>& name) const { return value_.FindMember(name); }
+#endif
+ GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_STDSTRING
+ GenericObject AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#endif
+ template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+ void RemoveAllMembers() { value_.RemoveAllMembers(); }
+ bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+ bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); }
+#endif
+ template <typename SourceAllocator> bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.RemoveMember(name); }
+ MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); }
+ MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); }
+ MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); }
+ bool EraseMember(const Ch* name) const { return value_.EraseMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+ bool EraseMember(const std::basic_string<Ch>& name) const { return EraseMember(ValueType(StringRef(name))); }
+#endif
+ template <typename SourceAllocator> bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.EraseMember(name); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+ MemberIterator begin() const { return value_.MemberBegin(); }
+ MemberIterator end() const { return value_.MemberEnd(); }
+#endif
+
+private:
+ GenericObject();
+ GenericObject(ValueType& value) : value_(value) {}
+ ValueType& value_;
+};
+
+RAPIDJSON_NAMESPACE_END
+#ifdef _MINWINDEF_ // see: http://stackoverflow.com/questions/22744262/cant-call-stdmax-because-minwindef-h-defines-max
+#ifndef NOMINMAX
+#pragma pop_macro("min")
+#pragma pop_macro("max")
+#endif
+#endif
+RAPIDJSON_DIAG_POP
+
+#endif // RAPIDJSON_DOCUMENT_H_
diff --git a/contrib/libs/rapidjson/include/rapidjson/internal/dtoa.h b/contrib/libs/rapidjson/include/rapidjson/internal/dtoa.h
new file mode 100644
index 0000000000..bf2e9b2e59
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/internal/dtoa.h
@@ -0,0 +1,245 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
+// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
+// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
+
+#ifndef RAPIDJSON_DTOA_
+#define RAPIDJSON_DTOA_
+
+#include "itoa.h" // GetDigitsLut()
+#include "diyfp.h"
+#include "ieee754.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
+#endif
+
+inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
+ while (rest < wp_w && delta - rest >= ten_kappa &&
+ (rest + ten_kappa < wp_w || /// closer
+ wp_w - rest > rest + ten_kappa - wp_w)) {
+ buffer[len - 1]--;
+ rest += ten_kappa;
+ }
+}
+
+inline int CountDecimalDigit32(uint32_t n) {
+ // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
+ if (n < 10) return 1;
+ if (n < 100) return 2;
+ if (n < 1000) return 3;
+ if (n < 10000) return 4;
+ if (n < 100000) return 5;
+ if (n < 1000000) return 6;
+ if (n < 10000000) return 7;
+ if (n < 100000000) return 8;
+ // Will not reach 10 digits in DigitGen()
+ //if (n < 1000000000) return 9;
+ //return 10;
+ return 9;
+}
+
+inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
+ static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+ const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
+ const DiyFp wp_w = Mp - W;
+ uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
+ uint64_t p2 = Mp.f & (one.f - 1);
+ int kappa = CountDecimalDigit32(p1); // kappa in [0, 9]
+ *len = 0;
+
+ while (kappa > 0) {
+ uint32_t d = 0;
+ switch (kappa) {
+ case 9: d = p1 / 100000000; p1 %= 100000000; break;
+ case 8: d = p1 / 10000000; p1 %= 10000000; break;
+ case 7: d = p1 / 1000000; p1 %= 1000000; break;
+ case 6: d = p1 / 100000; p1 %= 100000; break;
+ case 5: d = p1 / 10000; p1 %= 10000; break;
+ case 4: d = p1 / 1000; p1 %= 1000; break;
+ case 3: d = p1 / 100; p1 %= 100; break;
+ case 2: d = p1 / 10; p1 %= 10; break;
+ case 1: d = p1; p1 = 0; break;
+ default:;
+ }
+ if (d || *len)
+ buffer[(*len)++] = static_cast<char>('0' + static_cast<char>(d));
+ kappa--;
+ uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
+ if (tmp <= delta) {
+ *K += kappa;
+ GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
+ return;
+ }
+ }
+
+ // kappa = 0
+ for (;;) {
+ p2 *= 10;
+ delta *= 10;
+ char d = static_cast<char>(p2 >> -one.e);
+ if (d || *len)
+ buffer[(*len)++] = static_cast<char>('0' + d);
+ p2 &= one.f - 1;
+ kappa--;
+ if (p2 < delta) {
+ *K += kappa;
+ int index = -kappa;
+ GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0));
+ return;
+ }
+ }
+}
+
+inline void Grisu2(double value, char* buffer, int* length, int* K) {
+ const DiyFp v(value);
+ DiyFp w_m, w_p;
+ v.NormalizedBoundaries(&w_m, &w_p);
+
+ const DiyFp c_mk = GetCachedPower(w_p.e, K);
+ const DiyFp W = v.Normalize() * c_mk;
+ DiyFp Wp = w_p * c_mk;
+ DiyFp Wm = w_m * c_mk;
+ Wm.f++;
+ Wp.f--;
+ DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
+}
+
+inline char* WriteExponent(int K, char* buffer) {
+ if (K < 0) {
+ *buffer++ = '-';
+ K = -K;
+ }
+
+ if (K >= 100) {
+ *buffer++ = static_cast<char>('0' + static_cast<char>(K / 100));
+ K %= 100;
+ const char* d = GetDigitsLut() + K * 2;
+ *buffer++ = d[0];
+ *buffer++ = d[1];
+ }
+ else if (K >= 10) {
+ const char* d = GetDigitsLut() + K * 2;
+ *buffer++ = d[0];
+ *buffer++ = d[1];
+ }
+ else
+ *buffer++ = static_cast<char>('0' + static_cast<char>(K));
+
+ return buffer;
+}
+
+inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) {
+ const int kk = length + k; // 10^(kk-1) <= v < 10^kk
+
+ if (0 <= k && kk <= 21) {
+ // 1234e7 -> 12340000000
+ for (int i = length; i < kk; i++)
+ buffer[i] = '0';
+ buffer[kk] = '.';
+ buffer[kk + 1] = '0';
+ return &buffer[kk + 2];
+ }
+ else if (0 < kk && kk <= 21) {
+ // 1234e-2 -> 12.34
+ std::memmove(&buffer[kk + 1], &buffer[kk], static_cast<size_t>(length - kk));
+ buffer[kk] = '.';
+ if (0 > k + maxDecimalPlaces) {
+ // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1
+ // Remove extra trailing zeros (at least one) after truncation.
+ for (int i = kk + maxDecimalPlaces; i > kk + 1; i--)
+ if (buffer[i] != '0')
+ return &buffer[i + 1];
+ return &buffer[kk + 2]; // Reserve one zero
+ }
+ else
+ return &buffer[length + 1];
+ }
+ else if (-6 < kk && kk <= 0) {
+ // 1234e-6 -> 0.001234
+ const int offset = 2 - kk;
+ std::memmove(&buffer[offset], &buffer[0], static_cast<size_t>(length));
+ buffer[0] = '0';
+ buffer[1] = '.';
+ for (int i = 2; i < offset; i++)
+ buffer[i] = '0';
+ if (length - kk > maxDecimalPlaces) {
+ // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1
+ // Remove extra trailing zeros (at least one) after truncation.
+ for (int i = maxDecimalPlaces + 1; i > 2; i--)
+ if (buffer[i] != '0')
+ return &buffer[i + 1];
+ return &buffer[3]; // Reserve one zero
+ }
+ else
+ return &buffer[length + offset];
+ }
+ else if (kk < -maxDecimalPlaces) {
+ // Truncate to zero
+ buffer[0] = '0';
+ buffer[1] = '.';
+ buffer[2] = '0';
+ return &buffer[3];
+ }
+ else if (length == 1) {
+ // 1e30
+ buffer[1] = 'e';
+ return WriteExponent(kk - 1, &buffer[2]);
+ }
+ else {
+ // 1234e30 -> 1.234e33
+ std::memmove(&buffer[2], &buffer[1], static_cast<size_t>(length - 1));
+ buffer[1] = '.';
+ buffer[length + 1] = 'e';
+ return WriteExponent(kk - 1, &buffer[0 + length + 2]);
+ }
+}
+
+inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) {
+ RAPIDJSON_ASSERT(maxDecimalPlaces >= 1);
+ Double d(value);
+ if (d.IsZero()) {
+ if (d.Sign())
+ *buffer++ = '-'; // -0.0, Issue #289
+ buffer[0] = '0';
+ buffer[1] = '.';
+ buffer[2] = '0';
+ return &buffer[3];
+ }
+ else {
+ if (value < 0) {
+ *buffer++ = '-';
+ value = -value;
+ }
+ int length, K;
+ Grisu2(value, buffer, &length, &K);
+ return Prettify(buffer, length, K, maxDecimalPlaces);
+ }
+}
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_DTOA_
diff --git a/contrib/libs/rapidjson/include/rapidjson/internal/itoa.h b/contrib/libs/rapidjson/include/rapidjson/internal/itoa.h
new file mode 100644
index 0000000000..01a4e7e72d
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/internal/itoa.h
@@ -0,0 +1,304 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ITOA_
+#define RAPIDJSON_ITOA_
+
+#include "../rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+inline const char* GetDigitsLut() {
+ static const char cDigitsLut[200] = {
+ '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+ '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+ '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+ '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+ '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+ '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+ '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+ '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+ '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+ '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+ };
+ return cDigitsLut;
+}
+
+inline char* u32toa(uint32_t value, char* buffer) {
+ const char* cDigitsLut = GetDigitsLut();
+
+ if (value < 10000) {
+ const uint32_t d1 = (value / 100) << 1;
+ const uint32_t d2 = (value % 100) << 1;
+
+ if (value >= 1000)
+ *buffer++ = cDigitsLut[d1];
+ if (value >= 100)
+ *buffer++ = cDigitsLut[d1 + 1];
+ if (value >= 10)
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+ }
+ else if (value < 100000000) {
+ // value = bbbbcccc
+ const uint32_t b = value / 10000;
+ const uint32_t c = value % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = cDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = cDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+
+ *buffer++ = cDigitsLut[d3];
+ *buffer++ = cDigitsLut[d3 + 1];
+ *buffer++ = cDigitsLut[d4];
+ *buffer++ = cDigitsLut[d4 + 1];
+ }
+ else {
+ // value = aabbbbcccc in decimal
+
+ const uint32_t a = value / 100000000; // 1 to 42
+ value %= 100000000;
+
+ if (a >= 10) {
+ const unsigned i = a << 1;
+ *buffer++ = cDigitsLut[i];
+ *buffer++ = cDigitsLut[i + 1];
+ }
+ else
+ *buffer++ = static_cast<char>('0' + static_cast<char>(a));
+
+ const uint32_t b = value / 10000; // 0 to 9999
+ const uint32_t c = value % 10000; // 0 to 9999
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ *buffer++ = cDigitsLut[d1];
+ *buffer++ = cDigitsLut[d1 + 1];
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+ *buffer++ = cDigitsLut[d3];
+ *buffer++ = cDigitsLut[d3 + 1];
+ *buffer++ = cDigitsLut[d4];
+ *buffer++ = cDigitsLut[d4 + 1];
+ }
+ return buffer;
+}
+
+inline char* i32toa(int32_t value, char* buffer) {
+ uint32_t u = static_cast<uint32_t>(value);
+ if (value < 0) {
+ *buffer++ = '-';
+ u = ~u + 1;
+ }
+
+ return u32toa(u, buffer);
+}
+
+inline char* u64toa(uint64_t value, char* buffer) {
+ const char* cDigitsLut = GetDigitsLut();
+ const uint64_t kTen8 = 100000000;
+ const uint64_t kTen9 = kTen8 * 10;
+ const uint64_t kTen10 = kTen8 * 100;
+ const uint64_t kTen11 = kTen8 * 1000;
+ const uint64_t kTen12 = kTen8 * 10000;
+ const uint64_t kTen13 = kTen8 * 100000;
+ const uint64_t kTen14 = kTen8 * 1000000;
+ const uint64_t kTen15 = kTen8 * 10000000;
+ const uint64_t kTen16 = kTen8 * kTen8;
+
+ if (value < kTen8) {
+ uint32_t v = static_cast<uint32_t>(value);
+ if (v < 10000) {
+ const uint32_t d1 = (v / 100) << 1;
+ const uint32_t d2 = (v % 100) << 1;
+
+ if (v >= 1000)
+ *buffer++ = cDigitsLut[d1];
+ if (v >= 100)
+ *buffer++ = cDigitsLut[d1 + 1];
+ if (v >= 10)
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+ }
+ else {
+ // value = bbbbcccc
+ const uint32_t b = v / 10000;
+ const uint32_t c = v % 10000;
+
+ const uint32_t d1 = (b / 100) << 1;
+ const uint32_t d2 = (b % 100) << 1;
+
+ const uint32_t d3 = (c / 100) << 1;
+ const uint32_t d4 = (c % 100) << 1;
+
+ if (value >= 10000000)
+ *buffer++ = cDigitsLut[d1];
+ if (value >= 1000000)
+ *buffer++ = cDigitsLut[d1 + 1];
+ if (value >= 100000)
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+
+ *buffer++ = cDigitsLut[d3];
+ *buffer++ = cDigitsLut[d3 + 1];
+ *buffer++ = cDigitsLut[d4];
+ *buffer++ = cDigitsLut[d4 + 1];
+ }
+ }
+ else if (value < kTen16) {
+ const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
+ const uint32_t v1 = static_cast<uint32_t>(value % kTen8);
+
+ const uint32_t b0 = v0 / 10000;
+ const uint32_t c0 = v0 % 10000;
+
+ const uint32_t d1 = (b0 / 100) << 1;
+ const uint32_t d2 = (b0 % 100) << 1;
+
+ const uint32_t d3 = (c0 / 100) << 1;
+ const uint32_t d4 = (c0 % 100) << 1;
+
+ const uint32_t b1 = v1 / 10000;
+ const uint32_t c1 = v1 % 10000;
+
+ const uint32_t d5 = (b1 / 100) << 1;
+ const uint32_t d6 = (b1 % 100) << 1;
+
+ const uint32_t d7 = (c1 / 100) << 1;
+ const uint32_t d8 = (c1 % 100) << 1;
+
+ if (value >= kTen15)
+ *buffer++ = cDigitsLut[d1];
+ if (value >= kTen14)
+ *buffer++ = cDigitsLut[d1 + 1];
+ if (value >= kTen13)
+ *buffer++ = cDigitsLut[d2];
+ if (value >= kTen12)
+ *buffer++ = cDigitsLut[d2 + 1];
+ if (value >= kTen11)
+ *buffer++ = cDigitsLut[d3];
+ if (value >= kTen10)
+ *buffer++ = cDigitsLut[d3 + 1];
+ if (value >= kTen9)
+ *buffer++ = cDigitsLut[d4];
+ if (value >= kTen8)
+ *buffer++ = cDigitsLut[d4 + 1];
+
+ *buffer++ = cDigitsLut[d5];
+ *buffer++ = cDigitsLut[d5 + 1];
+ *buffer++ = cDigitsLut[d6];
+ *buffer++ = cDigitsLut[d6 + 1];
+ *buffer++ = cDigitsLut[d7];
+ *buffer++ = cDigitsLut[d7 + 1];
+ *buffer++ = cDigitsLut[d8];
+ *buffer++ = cDigitsLut[d8 + 1];
+ }
+ else {
+ const uint32_t a = static_cast<uint32_t>(value / kTen16); // 1 to 1844
+ value %= kTen16;
+
+ if (a < 10)
+ *buffer++ = static_cast<char>('0' + static_cast<char>(a));
+ else if (a < 100) {
+ const uint32_t i = a << 1;
+ *buffer++ = cDigitsLut[i];
+ *buffer++ = cDigitsLut[i + 1];
+ }
+ else if (a < 1000) {
+ *buffer++ = static_cast<char>('0' + static_cast<char>(a / 100));
+
+ const uint32_t i = (a % 100) << 1;
+ *buffer++ = cDigitsLut[i];
+ *buffer++ = cDigitsLut[i + 1];
+ }
+ else {
+ const uint32_t i = (a / 100) << 1;
+ const uint32_t j = (a % 100) << 1;
+ *buffer++ = cDigitsLut[i];
+ *buffer++ = cDigitsLut[i + 1];
+ *buffer++ = cDigitsLut[j];
+ *buffer++ = cDigitsLut[j + 1];
+ }
+
+ const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
+ const uint32_t v1 = static_cast<uint32_t>(value % kTen8);
+
+ const uint32_t b0 = v0 / 10000;
+ const uint32_t c0 = v0 % 10000;
+
+ const uint32_t d1 = (b0 / 100) << 1;
+ const uint32_t d2 = (b0 % 100) << 1;
+
+ const uint32_t d3 = (c0 / 100) << 1;
+ const uint32_t d4 = (c0 % 100) << 1;
+
+ const uint32_t b1 = v1 / 10000;
+ const uint32_t c1 = v1 % 10000;
+
+ const uint32_t d5 = (b1 / 100) << 1;
+ const uint32_t d6 = (b1 % 100) << 1;
+
+ const uint32_t d7 = (c1 / 100) << 1;
+ const uint32_t d8 = (c1 % 100) << 1;
+
+ *buffer++ = cDigitsLut[d1];
+ *buffer++ = cDigitsLut[d1 + 1];
+ *buffer++ = cDigitsLut[d2];
+ *buffer++ = cDigitsLut[d2 + 1];
+ *buffer++ = cDigitsLut[d3];
+ *buffer++ = cDigitsLut[d3 + 1];
+ *buffer++ = cDigitsLut[d4];
+ *buffer++ = cDigitsLut[d4 + 1];
+ *buffer++ = cDigitsLut[d5];
+ *buffer++ = cDigitsLut[d5 + 1];
+ *buffer++ = cDigitsLut[d6];
+ *buffer++ = cDigitsLut[d6 + 1];
+ *buffer++ = cDigitsLut[d7];
+ *buffer++ = cDigitsLut[d7 + 1];
+ *buffer++ = cDigitsLut[d8];
+ *buffer++ = cDigitsLut[d8 + 1];
+ }
+
+ return buffer;
+}
+
+inline char* i64toa(int64_t value, char* buffer) {
+ uint64_t u = static_cast<uint64_t>(value);
+ if (value < 0) {
+ *buffer++ = '-';
+ u = ~u + 1;
+ }
+
+ return u64toa(u, buffer);
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_ITOA_
diff --git a/contrib/libs/rapidjson/include/rapidjson/internal/strfunc.h b/contrib/libs/rapidjson/include/rapidjson/internal/strfunc.h
new file mode 100644
index 0000000000..226439a767
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/internal/strfunc.h
@@ -0,0 +1,69 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
+#define RAPIDJSON_INTERNAL_STRFUNC_H_
+
+#include "../stream.h"
+#include <cwchar>
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+//! Custom strlen() which works on different character types.
+/*! \tparam Ch Character type (e.g. char, wchar_t, short)
+ \param s Null-terminated input string.
+ \return Number of characters in the string.
+ \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
+*/
+template <typename Ch>
+inline SizeType StrLen(const Ch* s) {
+ RAPIDJSON_ASSERT(s != 0);
+ const Ch* p = s;
+ while (*p) ++p;
+ return SizeType(p - s);
+}
+
+template <>
+inline SizeType StrLen(const char* s) {
+ return SizeType(std::strlen(s));
+}
+
+template <>
+inline SizeType StrLen(const wchar_t* s) {
+ return SizeType(std::wcslen(s));
+}
+
+//! Returns number of code points in a encoded string.
+template<typename Encoding>
+bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
+ RAPIDJSON_ASSERT(s != 0);
+ RAPIDJSON_ASSERT(outCount != 0);
+ GenericStringStream<Encoding> is(s);
+ const typename Encoding::Ch* end = s + length;
+ SizeType count = 0;
+ while (is.src_ < end) {
+ unsigned codepoint;
+ if (!Encoding::Decode(is, &codepoint))
+ return false;
+ count++;
+ }
+ *outCount = count;
+ return true;
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_INTERNAL_STRFUNC_H_
diff --git a/contrib/libs/rapidjson/include/rapidjson/stringbuffer.h b/contrib/libs/rapidjson/include/rapidjson/stringbuffer.h
new file mode 100644
index 0000000000..4e38b82c3d
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/stringbuffer.h
@@ -0,0 +1,121 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_STRINGBUFFER_H_
+#define RAPIDJSON_STRINGBUFFER_H_
+
+#include "stream.h"
+#include "internal/stack.h"
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#include <utility> // std::move
+#endif
+
+#include "internal/stack.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Represents an in-memory output stream.
+/*!
+ \tparam Encoding Encoding of the stream.
+ \tparam Allocator type for allocating memory buffer.
+ \note implements Stream concept
+*/
+template <typename Encoding, typename Allocator = CrtAllocator>
+class GenericStringBuffer {
+public:
+ typedef typename Encoding::Ch Ch;
+
+ GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {}
+ GenericStringBuffer& operator=(GenericStringBuffer&& rhs) {
+ if (&rhs != this)
+ stack_ = std::move(rhs.stack_);
+ return *this;
+ }
+#endif
+
+ void Put(Ch c) { *stack_.template Push<Ch>() = c; }
+ void PutUnsafe(Ch c) { *stack_.template PushUnsafe<Ch>() = c; }
+ void Flush() {}
+
+ void Clear() { stack_.Clear(); }
+ void ShrinkToFit() {
+ // Push and pop a null terminator. This is safe.
+ *stack_.template Push<Ch>() = '\0';
+ stack_.ShrinkToFit();
+ stack_.template Pop<Ch>(1);
+ }
+
+ void Reserve(size_t count) { stack_.template Reserve<Ch>(count); }
+ Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
+ Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe<Ch>(count); }
+ void Pop(size_t count) { stack_.template Pop<Ch>(count); }
+
+ const Ch* GetString() const {
+ // Push and pop a null terminator. This is safe.
+ *stack_.template Push<Ch>() = '\0';
+ stack_.template Pop<Ch>(1);
+
+ return stack_.template Bottom<Ch>();
+ }
+
+ //! Get the size of string in bytes in the string buffer.
+ size_t GetSize() const { return stack_.GetSize(); }
+
+ //! Get the length of string in Ch in the string buffer.
+ size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); }
+
+ static const size_t kDefaultCapacity = 256;
+ mutable internal::Stack<Allocator> stack_;
+
+private:
+ // Prohibit copy constructor & assignment operator.
+ GenericStringBuffer(const GenericStringBuffer&);
+ GenericStringBuffer& operator=(const GenericStringBuffer&);
+};
+
+//! String buffer with UTF8 encoding
+typedef GenericStringBuffer<UTF8<> > StringBuffer;
+
+template<typename Encoding, typename Allocator>
+inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count) {
+ stream.Reserve(count);
+}
+
+template<typename Encoding, typename Allocator>
+inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c) {
+ stream.PutUnsafe(c);
+}
+
+//! Implement specialized version of PutN() with memset() for better performance.
+template<>
+inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
+ std::memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
+}
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_STRINGBUFFER_H_
diff --git a/contrib/libs/rapidjson/include/rapidjson/writer.h b/contrib/libs/rapidjson/include/rapidjson/writer.h
new file mode 100644
index 0000000000..68e14d9fd2
--- /dev/null
+++ b/contrib/libs/rapidjson/include/rapidjson/writer.h
@@ -0,0 +1,640 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_WRITER_H_
+#define RAPIDJSON_WRITER_H_
+
+#include "stream.h"
+#include "internal/meta.h"
+#include "internal/stack.h"
+#include "internal/strfunc.h"
+#include "internal/dtoa.h"
+#include "internal/itoa.h"
+#include "stringbuffer.h"
+#include <new> // placement new
+
+#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#endif
+#ifdef RAPIDJSON_SSE42
+#include <nmmintrin.h>
+#elif defined(RAPIDJSON_SSE2)
+#include <emmintrin.h>
+#endif
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// WriteFlag
+
+/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS
+ \ingroup RAPIDJSON_CONFIG
+ \brief User-defined kWriteDefaultFlags definition.
+
+ User can define this as any \c WriteFlag combinations.
+*/
+#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS
+#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags
+#endif
+
+//! Combination of writeFlags
+enum WriteFlag {
+ kWriteNoFlags = 0, //!< No flags are set.
+ kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings.
+ kWriteNanAndInfFlag = 2, //!< Allow writing of Infinity, -Infinity and NaN.
+ kWriteNoEscapeSlashFlag = 4, //!< Disable escaping of '/'.
+ kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS
+};
+
+//! JSON writer
+/*! Writer implements the concept Handler.
+ It generates JSON text by events to an output os.
+
+ User may programmatically calls the functions of a writer to generate JSON text.
+
+ On the other side, a writer can also be passed to objects that generates events,
+
+ for example Reader::Parse() and Document::Accept().
+
+ \tparam OutputStream Type of output stream.
+ \tparam SourceEncoding Encoding of source string.
+ \tparam TargetEncoding Encoding of output stream.
+ \tparam StackAllocator Type of allocator for allocating memory of stack.
+ \note implements Handler concept
+*/
+template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
+class Writer {
+public:
+ typedef typename SourceEncoding::Ch Ch;
+
+ static const int kDefaultMaxDecimalPlaces = 324;
+
+ //! Constructor
+ /*! \param os Output stream.
+ \param stackAllocator User supplied allocator. If it is null, it will create a private one.
+ \param levelDepth Initial capacity of stack.
+ */
+ explicit
+ Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) :
+ os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+
+ explicit
+ Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
+ os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+ Writer(Writer&& rhs) :
+ os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) {
+ rhs.os_ = 0;
+ }
+#endif
+
+ //! Reset the writer with a new stream.
+ /*!
+ This function reset the writer with a new stream and default settings,
+ in order to make a Writer object reusable for output multiple JSONs.
+
+ \param os New output stream.
+ \code
+ Writer<OutputStream> writer(os1);
+ writer.StartObject();
+ // ...
+ writer.EndObject();
+
+ writer.Reset(os2);
+ writer.StartObject();
+ // ...
+ writer.EndObject();
+ \endcode
+ */
+ void Reset(OutputStream& os) {
+ os_ = &os;
+ hasRoot_ = false;
+ level_stack_.Clear();
+ }
+
+ //! Checks whether the output is a complete JSON.
+ /*!
+ A complete JSON has a complete root object or array.
+ */
+ bool IsComplete() const {
+ return hasRoot_ && level_stack_.Empty();
+ }
+
+ int GetMaxDecimalPlaces() const {
+ return maxDecimalPlaces_;
+ }
+
+ //! Sets the maximum number of decimal places for double output.
+ /*!
+ This setting truncates the output with specified number of decimal places.
+
+ For example,
+
+ \code
+ writer.SetMaxDecimalPlaces(3);
+ writer.StartArray();
+ writer.Double(0.12345); // "0.123"
+ writer.Double(0.0001); // "0.0"
+ writer.Double(1.234567890123456e30); // "1.234567890123456e30" (do not truncate significand for positive exponent)
+ writer.Double(1.23e-4); // "0.0" (do truncate significand for negative exponent)
+ writer.EndArray();
+ \endcode
+
+ The default setting does not truncate any decimal places. You can restore to this setting by calling
+ \code
+ writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces);
+ \endcode
+ */
+ void SetMaxDecimalPlaces(int maxDecimalPlaces) {
+ maxDecimalPlaces_ = maxDecimalPlaces;
+ }
+
+ /*!@name Implementation of Handler
+ \see Handler
+ */
+ //@{
+
+ bool Null() { Prefix(kNullType); return EndValue(WriteNull()); }
+ bool Bool(bool b) { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); }
+ bool Int(int i) { Prefix(kNumberType); return EndValue(WriteInt(i)); }
+ bool Uint(unsigned u) { Prefix(kNumberType); return EndValue(WriteUint(u)); }
+ bool Int64(int64_t i64) { Prefix(kNumberType); return EndValue(WriteInt64(i64)); }
+ bool Uint64(uint64_t u64) { Prefix(kNumberType); return EndValue(WriteUint64(u64)); }
+
+ //! Writes the given \c double value to the stream
+ /*!
+ \param d The value to be written.
+ \return Whether it is succeed.
+ */
+ bool Double(double d) { Prefix(kNumberType); return EndValue(WriteDouble(d)); }
+
+ bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+ RAPIDJSON_ASSERT(str != 0);
+ (void)copy;
+ Prefix(kNumberType);
+ return EndValue(WriteString(str, length));
+ }
+
+ bool String(const Ch* str, SizeType length, bool copy = false) {
+ RAPIDJSON_ASSERT(str != 0);
+ (void)copy;
+ Prefix(kStringType);
+ return EndValue(WriteString(str, length));
+ }
+
+#if RAPIDJSON_HAS_STDSTRING
+ bool String(const std::basic_string<Ch>& str) {
+ return String(str.data(), SizeType(str.size()));
+ }
+#endif
+
+ bool StartObject() {
+ Prefix(kObjectType);
+ new (level_stack_.template Push<Level>()) Level(false);
+ return WriteStartObject();
+ }
+
+ bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+
+ bool EndObject(SizeType memberCount = 0) {
+ (void)memberCount;
+ RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
+ RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
+ RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount % 2); // Object has a Key without a Value
+ level_stack_.template Pop<Level>(1);
+ return EndValue(WriteEndObject());
+ }
+
+ bool StartArray() {
+ Prefix(kArrayType);
+ new (level_stack_.template Push<Level>()) Level(true);
+ return WriteStartArray();
+ }
+
+ bool EndArray(SizeType elementCount = 0) {
+ (void)elementCount;
+ RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
+ RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray);
+ level_stack_.template Pop<Level>(1);
+ return EndValue(WriteEndArray());
+ }
+ //@}
+
+ /*! @name Convenience extensions */
+ //@{
+
+ //! Simpler but slower overload.
+ bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); }
+ bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); }
+
+ //@}
+
+ //! Write a raw JSON value.
+ /*!
+ For user to write a stringified JSON as a value.
+
+ \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
+ \param length Length of the json.
+ \param type Type of the root of json.
+ */
+ bool RawValue(const Ch* json, size_t length, Type type) {
+ RAPIDJSON_ASSERT(json != 0);
+ Prefix(type);
+ return EndValue(WriteRawValue(json, length));
+ }
+
+ //! Flush the output stream.
+ /*!
+ Allows the user to flush the output stream immediately.
+ */
+ void Flush() {
+ os_->Flush();
+ }
+
+protected:
+ //! Information for each nested level
+ struct Level {
+ Level(bool inArray_) : valueCount(0), inArray(inArray_) {}
+ size_t valueCount; //!< number of values in this level
+ bool inArray; //!< true if in array, otherwise in object
+ };
+
+ static const size_t kDefaultLevelDepth = 32;
+
+ bool WriteNull() {
+ PutReserve(*os_, 4);
+ PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true;
+ }
+
+ bool WriteBool(bool b) {
+ if (b) {
+ PutReserve(*os_, 4);
+ PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e');
+ }
+ else {
+ PutReserve(*os_, 5);
+ PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e');
+ }
+ return true;
+ }
+
+ bool WriteInt(int i) {
+ char buffer[11];
+ const char* end = internal::i32toa(i, buffer);
+ PutReserve(*os_, static_cast<size_t>(end - buffer));
+ for (const char* p = buffer; p != end; ++p)
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+ return true;
+ }
+
+ bool WriteUint(unsigned u) {
+ char buffer[10];
+ const char* end = internal::u32toa(u, buffer);
+ PutReserve(*os_, static_cast<size_t>(end - buffer));
+ for (const char* p = buffer; p != end; ++p)
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+ return true;
+ }
+
+ bool WriteInt64(int64_t i64) {
+ char buffer[21];
+ const char* end = internal::i64toa(i64, buffer);
+ PutReserve(*os_, static_cast<size_t>(end - buffer));
+ for (const char* p = buffer; p != end; ++p)
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+ return true;
+ }
+
+ bool WriteUint64(uint64_t u64) {
+ char buffer[20];
+ char* end = internal::u64toa(u64, buffer);
+ PutReserve(*os_, static_cast<size_t>(end - buffer));
+ for (char* p = buffer; p != end; ++p)
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+ return true;
+ }
+
+ bool WriteDouble(double d) {
+ if (internal::Double(d).IsNanOrInf()) {
+ if (!(writeFlags & kWriteNanAndInfFlag))
+ return false;
+ if (internal::Double(d).IsNan()) {
+ PutReserve(*os_, 3);
+ PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+ return true;
+ }
+ if (internal::Double(d).Sign()) {
+ PutReserve(*os_, 9);
+ PutUnsafe(*os_, '-');
+ }
+ else
+ PutReserve(*os_, 8);
+ PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+ PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+ return true;
+ }
+
+ char buffer[25];
+ char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+ PutReserve(*os_, static_cast<size_t>(end - buffer));
+ for (char* p = buffer; p != end; ++p)
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+ return true;
+ }
+
+ bool WriteString(const Ch* str, SizeType length) {
+ static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+ static const char escape[256] = {
+#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+ //0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00
+ 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10
+ 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20
+ Z16, Z16, // 30~4F
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, // 50
+ Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF
+#undef Z16
+ };
+
+ if (TargetEncoding::supportUnicode)
+ PutReserve(*os_, 2 + length * 6); // "\uxxxx..."
+ else
+ PutReserve(*os_, 2 + length * 12); // "\uxxxx\uyyyy..."
+
+ PutUnsafe(*os_, '\"');
+ GenericStringStream<SourceEncoding> is(str);
+ while (ScanWriteUnescapedString(is, length)) {
+ const Ch c = is.Peek();
+ if (!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80) {
+ // Unicode escaping
+ unsigned codepoint;
+ if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint)))
+ return false;
+ PutUnsafe(*os_, '\\');
+ PutUnsafe(*os_, 'u');
+ if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
+ PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]);
+ PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]);
+ PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]);
+ PutUnsafe(*os_, hexDigits[(codepoint ) & 15]);
+ }
+ else {
+ RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF);
+ // Surrogate pair
+ unsigned s = codepoint - 0x010000;
+ unsigned lead = (s >> 10) + 0xD800;
+ unsigned trail = (s & 0x3FF) + 0xDC00;
+ PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]);
+ PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]);
+ PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]);
+ PutUnsafe(*os_, hexDigits[(lead ) & 15]);
+ PutUnsafe(*os_, '\\');
+ PutUnsafe(*os_, 'u');
+ PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]);
+ PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]);
+ PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]);
+ PutUnsafe(*os_, hexDigits[(trail ) & 15]);
+ }
+ }
+ else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)])) {
+ is.Take();
+ PutUnsafe(*os_, '\\');
+ PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
+ if (escape[static_cast<unsigned char>(c)] == 'u') {
+ PutUnsafe(*os_, '0');
+ PutUnsafe(*os_, '0');
+ PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) >> 4]);
+ PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) & 0xF]);
+ }
+ }
+ else if (RAPIDJSON_UNLIKELY(c == '/' && !(writeFlags & kWriteNoEscapeSlashFlag))) {
+ is.Take();
+ PutUnsafe(*os_, '\\');
+ PutUnsafe(*os_, '/');
+ }
+ else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ?
+ Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
+ Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+ return false;
+ }
+ PutUnsafe(*os_, '\"');
+ return true;
+ }
+
+ bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length) {
+ return RAPIDJSON_LIKELY(is.Tell() < length);
+ }
+
+ bool WriteStartObject() { os_->Put('{'); return true; }
+ bool WriteEndObject() { os_->Put('}'); return true; }
+ bool WriteStartArray() { os_->Put('['); return true; }
+ bool WriteEndArray() { os_->Put(']'); return true; }
+
+ bool WriteRawValue(const Ch* json, size_t length) {
+ PutReserve(*os_, length);
+ for (size_t i = 0; i < length; i++) {
+ RAPIDJSON_ASSERT(json[i] != '\0');
+ PutUnsafe(*os_, json[i]);
+ }
+ return true;
+ }
+
+ void Prefix(Type type) {
+ (void)type;
+ if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root
+ Level* level = level_stack_.template Top<Level>();
+ if (level->valueCount > 0) {
+ if (level->inArray)
+ os_->Put(','); // add comma if it is not the first element in array
+ else // in object
+ os_->Put((level->valueCount % 2 == 0) ? ',' : ':');
+ }
+ if (!level->inArray && level->valueCount % 2 == 0)
+ RAPIDJSON_ASSERT(type == kStringType); // if it's in object, then even number should be a name
+ level->valueCount++;
+ }
+ else {
+ RAPIDJSON_ASSERT(!hasRoot_); // Should only has one and only one root.
+ hasRoot_ = true;
+ }
+ }
+
+ // Flush the value if it is the top level one.
+ bool EndValue(bool ret) {
+ if (RAPIDJSON_UNLIKELY(level_stack_.Empty())) // end of json text
+ Flush();
+ return ret;
+ }
+
+ OutputStream* os_;
+ internal::Stack<StackAllocator> level_stack_;
+ int maxDecimalPlaces_;
+ bool hasRoot_;
+
+private:
+ // Prohibit copy constructor & assignment operator.
+ Writer(const Writer&);
+ Writer& operator=(const Writer&);
+};
+
+// Full specialization for StringStream to prevent memory copying
+
+template<>
+inline bool Writer<StringBuffer>::WriteInt(int i) {
+ char *buffer = os_->Push(11);
+ const char* end = internal::i32toa(i, buffer);
+ os_->Pop(static_cast<size_t>(11 - (end - buffer)));
+ return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteUint(unsigned u) {
+ char *buffer = os_->Push(10);
+ const char* end = internal::u32toa(u, buffer);
+ os_->Pop(static_cast<size_t>(10 - (end - buffer)));
+ return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) {
+ char *buffer = os_->Push(21);
+ const char* end = internal::i64toa(i64, buffer);
+ os_->Pop(static_cast<size_t>(21 - (end - buffer)));
+ return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
+ char *buffer = os_->Push(20);
+ const char* end = internal::u64toa(u, buffer);
+ os_->Pop(static_cast<size_t>(20 - (end - buffer)));
+ return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteDouble(double d) {
+ if (internal::Double(d).IsNanOrInf()) {
+ // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag).
+ if (!(kWriteDefaultFlags & kWriteNanAndInfFlag))
+ return false;
+ if (internal::Double(d).IsNan()) {
+ PutReserve(*os_, 3);
+ PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+ return true;
+ }
+ if (internal::Double(d).Sign()) {
+ PutReserve(*os_, 9);
+ PutUnsafe(*os_, '-');
+ }
+ else
+ PutReserve(*os_, 8);
+ PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+ PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+ return true;
+ }
+
+ char *buffer = os_->Push(25);
+ char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+ os_->Pop(static_cast<size_t>(25 - (end - buffer)));
+ return true;
+}
+
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+ if (length < 16)
+ return RAPIDJSON_LIKELY(is.Tell() < length);
+
+ if (!RAPIDJSON_LIKELY(is.Tell() < length))
+ return false;
+
+ const char* p = is.src_;
+ const char* end = is.head_ + length;
+ const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+ const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+ if (nextAligned > end)
+ return true;
+
+ while (p != nextAligned)
+ if (*p < 0x20 || *p == '\"' || *p == '\\') {
+ is.src_ = p;
+ return RAPIDJSON_LIKELY(is.Tell() < length);
+ }
+ else
+ os_->PutUnsafe(*p++);
+
+ // The rest of string using SIMD
+ static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+ static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+ static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+ const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+ const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+ const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+ for (; p != endAligned; p += 16) {
+ const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+ const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+ const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+ const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+ const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+ unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+ if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
+ SizeType len;
+#ifdef _MSC_VER // Find the index of first escaped
+ unsigned long offset;
+ _BitScanForward(&offset, r);
+ len = offset;
+#else
+ len = static_cast<SizeType>(__builtin_ffs(r) - 1);
+#endif
+ char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+ for (size_t i = 0; i < len; i++)
+ q[i] = p[i];
+
+ p += len;
+ break;
+ }
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s);
+ }
+
+ is.src_ = p;
+ return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#endif // defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef _MSC_VER
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_RAPIDJSON_H_