// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "contrib/libs/apache/arrow_next/cpp/src/arrow/pretty_print.h" #include #include #include #include #include #include #include #include // IWYU pragma: keep #include #include #include #include #include "contrib/libs/apache/arrow_next/cpp/src/arrow/array.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/chunked_array.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/record_batch.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/status.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/table.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/type.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/type_traits.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/checked_cast.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/formatting.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/int_util_overflow.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/key_value_metadata.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/util/string.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/vendored/datetime.h" #include "contrib/libs/apache/arrow_next/cpp/src/arrow/visit_array_inline.h" namespace arrow20 { using internal::checked_cast; using internal::StringFormatter; using internal::ToChars; namespace { class PrettyPrinter { public: PrettyPrinter(const PrettyPrintOptions& options, std::ostream* sink) : options_(options), indent_(options.indent), sink_(sink) {} inline void Write(std::string_view data); inline void WriteIndented(std::string_view data); inline void Newline(); inline void Indent(); inline void IndentAfterNewline(); void OpenArray(const Array& array); void CloseArray(const Array& array); void Flush() { (*sink_) << std::flush; } PrettyPrintOptions ChildOptions(bool increment_indent = false) const { PrettyPrintOptions child_options = options_; if (increment_indent) { child_options.indent = indent_ + child_options.indent_size; } else { child_options.indent = indent_; } return child_options; } protected: const PrettyPrintOptions& options_; int indent_; std::ostream* sink_; }; void PrettyPrinter::OpenArray(const Array& array) { if (!options_.skip_new_lines) { Indent(); } (*sink_) << options_.array_delimiters.open; if (array.length() > 0) { Newline(); indent_ += options_.indent_size; } } void PrettyPrinter::CloseArray(const Array& array) { if (array.length() > 0) { indent_ -= options_.indent_size; if (!options_.skip_new_lines) { Indent(); } } (*sink_) << options_.array_delimiters.close; } void PrettyPrinter::Write(std::string_view data) { (*sink_) << data; } void PrettyPrinter::WriteIndented(std::string_view data) { Indent(); Write(data); } void PrettyPrinter::Newline() { if (options_.skip_new_lines) { return; } (*sink_) << "\n"; } void PrettyPrinter::Indent() { for (int i = 0; i < indent_; ++i) { (*sink_) << " "; } } void PrettyPrinter::IndentAfterNewline() { if (options_.skip_new_lines) { return; } Indent(); } class ArrayPrinter : public PrettyPrinter { public: ArrayPrinter(const PrettyPrintOptions& options, std::ostream* sink) : PrettyPrinter(options, sink) {} private: template Status WriteValues(const Array& array, FormatFunction&& func, bool indent_non_null_values = true, bool is_container = false) { // `indent_non_null_values` should be false if `FormatFunction` applies // indentation itself. int window = is_container ? options_.container_window : options_.window; for (int64_t i = 0; i < array.length(); ++i) { const bool is_last = (i == array.length() - 1); // check if `length == 2 * window + 1` to eliminate ellipsis for only one element if ((array.length() != 2 * window + 1) && (i >= window) && (i < (array.length() - window))) { IndentAfterNewline(); (*sink_) << "..."; if (!is_last && options_.skip_new_lines) { (*sink_) << options_.array_delimiters.element; } i = array.length() - window - 1; } else if (array.IsNull(i)) { IndentAfterNewline(); (*sink_) << options_.null_rep; if (!is_last) { (*sink_) << options_.array_delimiters.element; } } else { if (indent_non_null_values) { IndentAfterNewline(); } RETURN_NOT_OK(func(i)); if (!is_last) { (*sink_) << options_.array_delimiters.element; } } Newline(); } return Status::OK(); } template Status WritePrimitiveValues(const ArrayType& array, Formatter* formatter) { auto appender = [&](std::string_view v) { (*sink_) << v; }; auto format_func = [&](int64_t i) { (*formatter)(array.GetView(i), appender); return Status::OK(); }; return WriteValues(array, std::move(format_func)); } template Status WritePrimitiveValues(const ArrayType& array) { StringFormatter formatter{array.type().get()}; return WritePrimitiveValues(array, &formatter); } Status WriteValidityBitmap(const Array& array); Status PrintChildren(const std::vector& fields, int64_t offset, int64_t length) { for (size_t i = 0; i < fields.size(); ++i) { Write("\n"); // Always want newline before child array description Indent(); std::stringstream ss; ss << "-- child " << i << " type: " << fields[i]->type()->ToString() << "\n"; Write(ss.str()); // Indent(); const Array* field = fields[i]; if (offset != 0) { RETURN_NOT_OK( PrettyPrint(*field->Slice(offset, length), ChildOptions(true), sink_)); } else { RETURN_NOT_OK(PrettyPrint(*field, ChildOptions(true), sink_)); } } return Status::OK(); } // // WriteDataValues(): generic function to write values from an array // template enable_if_has_c_type WriteDataValues(const ArrayType& array) { return WritePrimitiveValues(array); } Status WriteDataValues(const HalfFloatArray& array) { // XXX do not know how to format half floats yet StringFormatter formatter{array.type().get()}; return WritePrimitiveValues(array, &formatter); } template enable_if_has_string_view WriteDataValues(const ArrayType& array) { return WriteValues(array, [&](int64_t i) { if constexpr (T::is_utf8) { (*sink_) << "\"" << array.GetView(i) << "\""; } else { (*sink_) << HexEncode(array.GetView(i)); } return Status::OK(); }); } template enable_if_decimal WriteDataValues(const ArrayType& array) { return WriteValues(array, [&](int64_t i) { (*sink_) << array.FormatValue(i); return Status::OK(); }); } template enable_if_t::value || is_list_view_type::value, Status> WriteDataValues(const ArrayType& array) { const auto values = array.values(); const auto child_options = ChildOptions(); ArrayPrinter values_printer(child_options, sink_); return WriteValues( array, [&](int64_t i) { // XXX this could be much faster if ArrayPrinter allowed specifying start and // stop endpoints. return values_printer.Print( *values->Slice(array.value_offset(i), array.value_length(i))); }, /*indent_non_null_values=*/false, /*is_container=*/true); } Status WriteDataValues(const MapArray& array) { const auto keys = array.keys(); const auto items = array.items(); const auto child_options = ChildOptions(); ArrayPrinter values_printer(child_options, sink_); return WriteValues( array, [&](int64_t i) { IndentAfterNewline(); (*sink_) << "keys:"; Newline(); RETURN_NOT_OK(values_printer.Print( *keys->Slice(array.value_offset(i), array.value_length(i)))); Newline(); IndentAfterNewline(); (*sink_) << "values:"; Newline(); RETURN_NOT_OK(values_printer.Print( *items->Slice(array.value_offset(i), array.value_length(i)))); return Status::OK(); }, /*indent_non_null_values=*/false); } public: template enable_if_t::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value || std::is_base_of::value, Status> Visit(const T& array) { Status st = array.Validate(); if (!st.ok()) { (*sink_) << ""; return Status::OK(); } OpenArray(array); if (array.length() > 0) { RETURN_NOT_OK(WriteDataValues(array)); } CloseArray(array); return Status::OK(); } Status Visit(const NullArray& array) { (*sink_) << array.length() << " nulls"; return Status::OK(); } Status Visit(const ExtensionArray& array) { return Print(*array.storage()); } Status Visit(const StructArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); std::vector children; children.reserve(array.num_fields()); for (int i = 0; i < array.num_fields(); ++i) { children.emplace_back(array.field(i).get()); } return PrintChildren(children, 0, array.length()); } Status Visit(const UnionArray& array) { RETURN_NOT_OK(WriteValidityBitmap(array)); Newline(); Indent(); Write("-- type_ids: "); UInt8Array type_codes(array.length(), array.type_codes(), nullptr, 0, array.offset()); RETURN_NOT_OK(PrettyPrint(type_codes, ChildOptions(true), sink_)); if (array.mode() == UnionMode::DENSE) { Newline(); Indent(); Write("-- value_offsets: "); Int32Array value_offsets( array.length(), checked_cast(array).value_offsets(), nullptr, 0, array.offset()); RETURN_NOT_OK(PrettyPrint(value_offsets, ChildOptions(true), sink_)); } // Print the children without any offset, because the type ids are absolute std::vector children; children.reserve(array.num_fields()); for (int i = 0; i < array.num_fields(); ++i) { children.emplace_back(array.field(i).get()); } return PrintChildren(children, 0, array.length() + array.offset()); } Status Visit(const DictionaryArray& array) { Newline(); Indent(); Write("-- dictionary:\n"); RETURN_NOT_OK(PrettyPrint(*array.dictionary(), ChildOptions(true), sink_)); Newline(); Indent(); Write("-- indices:\n"); return PrettyPrint(*array.indices(), ChildOptions(true), sink_); } Status Visit(const RunEndEncodedArray& array) { Newline(); Indent(); Write("-- run_ends:\n"); RETURN_NOT_OK(PrettyPrint(*array.run_ends(), ChildOptions(true), sink_)); Newline(); Indent(); Write("-- values:\n"); return PrettyPrint(*array.values(), ChildOptions(true), sink_); } Status Print(const Array& array) { if (array.device_type() != DeviceAllocationType::kCPU) { // GH-43055: ideally we only copy start/end slices from non-CPU memory // based on the window size that is being printed ARROW_ASSIGN_OR_RAISE(auto array_cpu, array.ViewOrCopyTo(default_cpu_memory_manager())); RETURN_NOT_OK(VisitArrayInline(*array_cpu, this)); } else { RETURN_NOT_OK(VisitArrayInline(array, this)); } Flush(); return Status::OK(); } }; Status ArrayPrinter::WriteValidityBitmap(const Array& array) { Indent(); Write("-- is_valid:"); if (array.null_count() > 0) { Newline(); Indent(); BooleanArray is_valid(array.length(), array.null_bitmap(), nullptr, 0, array.offset()); return PrettyPrint(is_valid, ChildOptions(true), sink_); } else { Write(" all not null"); return Status::OK(); } } } // namespace Status PrettyPrint(const Array& arr, int indent, std::ostream* sink) { PrettyPrintOptions options; options.indent = indent; ArrayPrinter printer(options, sink); return printer.Print(arr); } Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options, std::ostream* sink) { ArrayPrinter printer(options, sink); return printer.Print(arr); } Status PrettyPrint(const Array& arr, const PrettyPrintOptions& options, std::string* result) { std::ostringstream sink; RETURN_NOT_OK(PrettyPrint(arr, options, &sink)); *result = sink.str(); return Status::OK(); } Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options, std::ostream* sink) { int num_chunks = chunked_arr.num_chunks(); int indent = options.indent; int window = options.container_window; // Struct fields are always on new line bool skip_new_lines = options.skip_new_lines && (chunked_arr.type()->id() != Type::STRUCT); for (int i = 0; i < indent; ++i) { (*sink) << " "; } (*sink) << options.chunked_array_delimiters.open; if (!skip_new_lines) { *sink << "\n"; } bool skip_element_delimiter = true; for (int i = 0; i < num_chunks; ++i) { if (skip_element_delimiter) { skip_element_delimiter = false; } else { (*sink) << options.chunked_array_delimiters.element; if (!skip_new_lines) { *sink << "\n"; } } if ((i >= window) && (i < (num_chunks - window))) { for (int i = 0; i < indent; ++i) { (*sink) << " "; } (*sink) << "..."; (*sink) << options.chunked_array_delimiters.element; if (!skip_new_lines) { *sink << "\n"; } i = num_chunks - window - 1; skip_element_delimiter = true; } else { PrettyPrintOptions chunk_options = options; chunk_options.indent += options.indent_size; ArrayPrinter printer(chunk_options, sink); RETURN_NOT_OK(printer.Print(*chunked_arr.chunk(i))); } } if (!options.skip_new_lines) { *sink << "\n"; } for (int i = 0; i < indent; ++i) { (*sink) << " "; } (*sink) << options.chunked_array_delimiters.close; return Status::OK(); } Status PrettyPrint(const ChunkedArray& chunked_arr, const PrettyPrintOptions& options, std::string* result) { std::ostringstream sink; RETURN_NOT_OK(PrettyPrint(chunked_arr, options, &sink)); *result = sink.str(); return Status::OK(); } Status PrettyPrint(const RecordBatch& batch, int indent, std::ostream* sink) { for (int i = 0; i < batch.num_columns(); ++i) { const std::string& name = batch.column_name(i); (*sink) << name << ": "; RETURN_NOT_OK(PrettyPrint(*batch.column(i), indent + 2, sink)); (*sink) << "\n"; } (*sink) << std::flush; return Status::OK(); } Status PrettyPrint(const RecordBatch& batch, const PrettyPrintOptions& options, std::ostream* sink) { for (int i = 0; i < batch.num_columns(); ++i) { const std::string& name = batch.column_name(i); PrettyPrintOptions column_options = options; column_options.indent += 2; (*sink) << name << ": "; RETURN_NOT_OK(PrettyPrint(*batch.column(i), column_options, sink)); (*sink) << "\n"; } (*sink) << std::flush; return Status::OK(); } Status PrettyPrint(const Table& table, const PrettyPrintOptions& options, std::ostream* sink) { RETURN_NOT_OK(PrettyPrint(*table.schema(), options, sink)); (*sink) << "\n"; (*sink) << "----\n"; PrettyPrintOptions column_options = options; column_options.indent += 2; for (int i = 0; i < table.num_columns(); ++i) { for (int j = 0; j < options.indent; ++j) { (*sink) << " "; } (*sink) << table.schema()->field(i)->name() << ":\n"; RETURN_NOT_OK(PrettyPrint(*table.column(i), column_options, sink)); (*sink) << "\n"; } (*sink) << std::flush; return Status::OK(); } Status DebugPrint(const Array& arr, int indent) { return PrettyPrint(arr, indent, &std::cerr); } namespace { class SchemaPrinter : public PrettyPrinter { public: SchemaPrinter(const Schema& schema, const PrettyPrintOptions& options, std::ostream* sink) : PrettyPrinter(options, sink), schema_(schema) {} Status PrintType(const DataType& type, bool nullable); Status PrintField(const Field& field); void PrintVerboseMetadata(const KeyValueMetadata& metadata) { for (int64_t i = 0; i < metadata.size(); ++i) { Newline(); Indent(); Write(metadata.key(i) + ": '" + metadata.value(i) + "'"); } } void PrintTruncatedMetadata(const KeyValueMetadata& metadata) { for (int64_t i = 0; i < metadata.size(); ++i) { Newline(); Indent(); size_t size = metadata.value(i).size(); size_t truncated_size = std::max(10, 70 - metadata.key(i).size() - indent_); if (size <= truncated_size) { Write(metadata.key(i) + ": '" + metadata.value(i) + "'"); continue; } Write(metadata.key(i) + ": '" + metadata.value(i).substr(0, truncated_size) + "' + " + ToChars(size - truncated_size)); } } void PrintMetadata(const std::string& metadata_type, const KeyValueMetadata& metadata) { if (metadata.size() > 0) { Newline(); Indent(); Write(metadata_type); if (options_.truncate_metadata) { PrintTruncatedMetadata(metadata); } else { PrintVerboseMetadata(metadata); } } } Status Print() { for (int i = 0; i < schema_.num_fields(); ++i) { if (i > 0) { Newline(); Indent(); } else { Indent(); } RETURN_NOT_OK(PrintField(*schema_.field(i))); } if (options_.show_schema_metadata && schema_.metadata() != nullptr) { PrintMetadata("-- schema metadata --", *schema_.metadata()); } Flush(); return Status::OK(); } private: const Schema& schema_; }; Status SchemaPrinter::PrintType(const DataType& type, bool nullable) { Write(type.ToString()); if (!nullable) { Write(" not null"); } for (int i = 0; i < type.num_fields(); ++i) { Newline(); Indent(); std::stringstream ss; ss << "child " << i << ", "; indent_ += options_.indent_size; WriteIndented(ss.str()); RETURN_NOT_OK(PrintField(*type.field(i))); indent_ -= options_.indent_size; } return Status::OK(); } Status SchemaPrinter::PrintField(const Field& field) { Write(field.name()); Write(": "); RETURN_NOT_OK(PrintType(*field.type(), field.nullable())); if (options_.show_field_metadata && field.metadata() != nullptr) { indent_ += options_.indent_size; PrintMetadata("-- field metadata --", *field.metadata()); indent_ -= options_.indent_size; } return Status::OK(); } } // namespace Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options, std::ostream* sink) { SchemaPrinter printer(schema, options, sink); return printer.Print(); } Status PrettyPrint(const Schema& schema, const PrettyPrintOptions& options, std::string* result) { std::ostringstream sink; RETURN_NOT_OK(PrettyPrint(schema, options, &sink)); *result = sink.str(); return Status::OK(); } } // namespace arrow20